Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
Pull cgroup fixes from Tejun Heo:

 - The destruction path of cgroup objects are asynchronous and
   multi-staged and some of them ended up destroying parents before
   children leading to failures in cpu and memory controllers.  Ensure
   that parents are always destroyed after children.

 - cpuset mm node migration was performed synchronously while holding
   threadgroup and cgroup mutexes and the recent threadgroup locking
   update resulted in a possible deadlock.  The migration is best effort
   and shouldn't have been performed under those locks to begin with.
   Made asynchronous.

 - Minor documentation fix.

* 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  Documentation: cgroup: Fix 'cgroup-legacy' -> 'cgroup-v1'
  cgroup: make sure a parent css isn't freed before its children
  cgroup: make sure a parent css isn't offlined before its children
  cpuset: make mm migration asynchronous

1397 files changed:
.mailmap
Documentation/ABI/testing/configfs-rdma_cm [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-infiniband [new file with mode: 0644]
Documentation/Intel-IOMMU.txt
Documentation/cgroup-v2.txt
Documentation/devicetree/bindings/input/gpio-keys.txt
Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
Documentation/devicetree/bindings/net/mdio-mux.txt
Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/phy.txt
Documentation/devicetree/bindings/net/ralink,rt2880-net.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/phy/phy-ath79-usb.txt [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
Documentation/filesystems/proc.txt
Documentation/infiniband/core_locking.txt
Documentation/kernel-parameters.txt
Documentation/kernel-per-CPU-kthreads.txt
Documentation/networking/ip-sysctl.txt
Documentation/sysctl/fs.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
Makefile
arch/arm/Kconfig.debug
arch/arm/boot/compressed/Makefile
arch/arm/boot/dts/am33xx.dtsi
arch/arm/boot/dts/am4372.dtsi
arch/arm/boot/dts/am437x-gp-evm.dts
arch/arm/boot/dts/am43x-epos-evm.dts
arch/arm/boot/dts/am57xx-cl-som-am57x.dts
arch/arm/boot/dts/am57xx-sbc-am57x.dts
arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
arch/arm/boot/dts/at91-sama5d2_xplained.dts
arch/arm/boot/dts/at91-sama5d4_xplained.dts
arch/arm/boot/dts/at91-sama5d4ek.dts
arch/arm/boot/dts/at91sam9n12ek.dts
arch/arm/boot/dts/kirkwood-lswvl.dts
arch/arm/boot/dts/kirkwood-lswxl.dts
arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-board-common.dtsi
arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
arch/arm/boot/dts/r8a7740-armadillo800eva.dts
arch/arm/boot/dts/sama5d4.dtsi
arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/arm/mach-omap2/devices.c
arch/arm/mach-omap2/pdata-quirks.c
arch/arm/mach-omap2/sleep34xx.S
arch/arm/mach-omap2/sleep44xx.S
arch/arm/mach-realview/Kconfig
arch/arm/mach-realview/Makefile
arch/arm/mach-realview/platsmp-dt.c
arch/arm/mach-tango/Kconfig
arch/arm/mach-tango/platsmp.c
arch/arm/mach-tegra/Kconfig
arch/arm/mach-tegra/sleep-tegra20.S
arch/arm/mach-tegra/sleep-tegra30.S
arch/arm/mm/dma-mapping.c
arch/arm64/Kconfig.platforms
arch/arm64/Makefile
arch/arm64/boot/dts/Makefile
arch/arm64/boot/dts/arm/juno-base.dtsi
arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
arch/arm64/boot/dts/nvidia/Makefile [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132-norrin.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2571.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210.dtsi [new file with mode: 0644]
arch/arm64/configs/defconfig
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/page.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/head.S
arch/arm64/kernel/image.h
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/dump.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc-macros.S
arch/arm64/mm/proc.S
arch/ia64/include/asm/unistd.h
arch/ia64/include/uapi/asm/unistd.h
arch/ia64/kernel/entry.S
arch/m32r/Kconfig
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/common/gpiolib.c
arch/mips/ar7/gpio.c
arch/mips/ath79/common.h
arch/mips/ath79/irq.c
arch/mips/ath79/setup.c
arch/mips/bcm47xx/sprom.c
arch/mips/bcm63xx/nvram.c
arch/mips/bmips/setup.c
arch/mips/boot/compressed/Makefile
arch/mips/boot/compressed/uart-prom.c [new file with mode: 0644]
arch/mips/boot/dts/Makefile
arch/mips/boot/dts/brcm/bcm6328.dtsi
arch/mips/boot/dts/brcm/bcm6368.dtsi
arch/mips/boot/dts/brcm/bcm7125.dtsi
arch/mips/boot/dts/brcm/bcm7346.dtsi
arch/mips/boot/dts/brcm/bcm7358.dtsi
arch/mips/boot/dts/brcm/bcm7360.dtsi
arch/mips/boot/dts/brcm/bcm7362.dtsi
arch/mips/boot/dts/brcm/bcm7420.dtsi
arch/mips/boot/dts/brcm/bcm7425.dtsi
arch/mips/boot/dts/brcm/bcm7435.dtsi
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/ingenic/jz4780.dtsi
arch/mips/boot/dts/pic32/Makefile [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda_sk.dts [new file with mode: 0644]
arch/mips/boot/dts/qca/ar9132.dtsi
arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
arch/mips/configs/pic32mzda_defconfig [new file with mode: 0644]
arch/mips/include/asm/cacheops.h
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/elf.h
arch/mips/include/asm/fpu_emulator.h
arch/mips/include/asm/io.h
arch/mips/include/asm/irqflags.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-ath79/ath79.h
arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h [deleted file]
arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/pic32.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/spaces.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mips-cm.h
arch/mips/include/asm/mips-r2-to-r6-emul.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/page.h
arch/mips/include/asm/pgtable.h
arch/mips/include/uapi/asm/inst.h
arch/mips/kernel/cpu-bugs64.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/elf.c
arch/mips/kernel/gpio_txx9.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-cps.c
arch/mips/kernel/sync-r4k.c
arch/mips/kernel/traps.c
arch/mips/kvm/callback.c
arch/mips/kvm/dyntrans.c
arch/mips/kvm/emulate.c
arch/mips/kvm/interrupt.c
arch/mips/kvm/locore.S
arch/mips/kvm/mips.c
arch/mips/kvm/opcode.h [deleted file]
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/lib/mips-atomic.c
arch/mips/loongson64/Platform
arch/mips/loongson64/loongson-3/hpet.c
arch/mips/loongson64/loongson-3/smp.c
arch/mips/math-emu/cp1emu.c
arch/mips/math-emu/dp_simple.c
arch/mips/math-emu/dp_tint.c
arch/mips/math-emu/dp_tlong.c
arch/mips/math-emu/dsemul.c
arch/mips/math-emu/ieee754.c
arch/mips/math-emu/ieee754.h
arch/mips/math-emu/ieee754dp.c
arch/mips/math-emu/ieee754int.h
arch/mips/math-emu/ieee754sp.c
arch/mips/math-emu/sp_fdp.c
arch/mips/math-emu/sp_simple.c
arch/mips/math-emu/sp_tint.c
arch/mips/math-emu/sp_tlong.c
arch/mips/mm/tlbex.c
arch/mips/pci/Makefile
arch/mips/pci/pci-mt7620.c [new file with mode: 0644]
arch/mips/pic32/Kconfig [new file with mode: 0644]
arch/mips/pic32/Makefile [new file with mode: 0644]
arch/mips/pic32/Platform [new file with mode: 0644]
arch/mips/pic32/common/Makefile [new file with mode: 0644]
arch/mips/pic32/common/irq.c [new file with mode: 0644]
arch/mips/pic32/common/reset.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/Makefile [new file with mode: 0644]
arch/mips/pic32/pic32mzda/config.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_clk.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_console.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/init.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/pic32mzda.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/time.c [new file with mode: 0644]
arch/mips/ralink/Kconfig
arch/mips/ralink/Makefile
arch/mips/ralink/Platform
arch/mips/ralink/irq-gic.c [new file with mode: 0644]
arch/mips/ralink/mt7620.c
arch/mips/ralink/mt7621.c [new file with mode: 0644]
arch/mips/ralink/rt288x.c
arch/mips/ralink/rt305x.c
arch/mips/ralink/rt3883.c
arch/mips/ralink/timer-gic.c [new file with mode: 0644]
arch/mips/rb532/gpio.c
arch/mips/txx9/generic/setup.c
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/module_64.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/power8-pmu.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/s390/hypfs/inode.c
arch/s390/include/asm/irqflags.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pci_io.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/crash_dump.c
arch/s390/kernel/debug.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/early.c
arch/s390/kernel/ftrace.c
arch/s390/kernel/ipl.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/perf_event.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/stacktrace.c
arch/s390/kernel/syscalls.S
arch/s390/kernel/traps.c
arch/s390/kvm/Kconfig
arch/s390/kvm/Makefile
arch/s390/kvm/guestdbg.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/mmap.c
arch/s390/mm/pgtable.c
arch/s390/numa/numa.c
arch/s390/oprofile/backtrace.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/sh/include/asm/barrier.h
arch/um/include/asm/page.h
arch/x86/Kconfig
arch/x86/crypto/chacha20-ssse3-x86_64.S
arch/x86/include/asm/irq.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/pmem.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/head64.c
arch/x86/kernel/irq.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/pageattr.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/platform/intel-quark/imr.c
block/Makefile
block/blk-iopoll.c [deleted file]
block/blk-merge.c
block/ioctl.c
block/partition-generic.c
crypto/Kconfig
crypto/af_alg.c
crypto/ahash.c
crypto/algif_hash.c
crypto/algif_skcipher.c
crypto/asymmetric_keys/pkcs7_parser.c
crypto/crc32c_generic.c
crypto/crypto_user.c
crypto/shash.c
crypto/skcipher.c
drivers/acpi/acpi_lpss.c
drivers/acpi/apei/erst.c
drivers/acpi/video_detect.c
drivers/amba/Kconfig
drivers/base/devtmpfs.c
drivers/base/platform-msi.c
drivers/base/platform.c
drivers/base/power/common.c
drivers/base/power/domain.c
drivers/base/regmap/regmap-mmio.c
drivers/block/aoe/aoecmd.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_debugfs.c
drivers/block/drbd/drbd_int.h
drivers/block/rbd.c
drivers/bus/Kconfig
drivers/bus/vexpress-config.c
drivers/char/hw_random/Kconfig
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/mem.c
drivers/char/mspec.c
drivers/char/ps3flash.c
drivers/clocksource/Kconfig
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pxa2xx-cpufreq.c
drivers/cpuidle/coupled.c
drivers/cpuidle/cpuidle.c
drivers/crypto/Kconfig
drivers/crypto/atmel-aes.c
drivers/crypto/atmel-sha.c
drivers/crypto/caam/ctrl.c
drivers/crypto/marvell/cesa.c
drivers/crypto/qat/qat_common/qat_hal.c
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/iceland_smc.c
drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_dp_mst_topology.c
drivers/gpu/drm/drm_hashtab.c
drivers/gpu/drm/etnaviv/common.xml.h
drivers/gpu/drm/etnaviv/etnaviv_drv.c
drivers/gpu/drm/etnaviv/etnaviv_drv.h
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/state_hi.xml.h
drivers/gpu/drm/exynos/exynos_dp_core.c
drivers/gpu/drm/exynos/exynos_drm_dsi.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/i2c/adv7511.c
drivers/gpu/drm/i2c/adv7511.h
drivers/gpu/drm/i915/Kconfig
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/radeon/dce6_afmt.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_audio.h
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/vce_v1_0.c
drivers/gpu/drm/rockchip/Makefile
drivers/gpu/drm/rockchip/dw-mipi-dsi.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_fb.c
drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
drivers/gpu/drm/rockchip/rockchip_drm_gem.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/fam15h_power.c
drivers/hwspinlock/hwspinlock_core.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-piix4.c
drivers/iio/accel/Kconfig
drivers/iio/adc/Kconfig
drivers/iio/adc/ti_am335x_adc.c
drivers/iio/dac/mcp4725.c
drivers/iio/humidity/dht11.c
drivers/iio/imu/adis_buffer.c
drivers/iio/imu/inv_mpu6050/Kconfig
drivers/iio/inkern.c
drivers/iio/light/acpi-als.c
drivers/iio/light/ltr501.c
drivers/iio/pressure/mpl115.c
drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
drivers/infiniband/Kconfig
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c [new file with mode: 0644]
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/cq.c [new file with mode: 0644]
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/roce_gid_mgmt.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ud_header.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_marshall.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_mem.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/cxgb4/user.h
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mlx5/ah.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/user.h
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_mr.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/qib/qib_verbs_mcast.c
drivers/infiniband/hw/usnic/usnic_debugfs.c
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
drivers/infiniband/hw/usnic/usnic_vnic.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/isert/isert_proto.h [deleted file]
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/infiniband/ulp/srpt/ib_srpt.h
drivers/input/joystick/xpad.c
drivers/input/keyboard/gpio_keys.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/iommu/amd_iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-atmel-aic-common.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-mxs.c
drivers/irqchip/irq-pic32-evic.c [new file with mode: 0644]
drivers/irqchip/irq-s3c24xx.c
drivers/mailbox/Kconfig
drivers/mailbox/pcc.c
drivers/md/bitmap.c
drivers/md/faulty.c
drivers/md/md-cluster.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/media/dvb-frontends/tda1004x.c
drivers/media/i2c/ir-kbd-i2c.c
drivers/media/i2c/s5k6a3.c
drivers/media/pci/saa7134/saa7134-alsa.c
drivers/media/platform/Kconfig
drivers/media/platform/exynos4-is/Kconfig
drivers/media/platform/exynos4-is/fimc-is.c
drivers/media/platform/exynos4-is/fimc-isp-video.c
drivers/media/platform/exynos4-is/media-dev.c
drivers/media/platform/soc_camera/atmel-isi.c
drivers/media/platform/soc_camera/soc_camera.c
drivers/media/platform/vsp1/vsp1_drv.c
drivers/media/platform/vsp1/vsp1_video.c
drivers/media/v4l2-core/videobuf2-core.c
drivers/media/v4l2-core/videobuf2-v4l2.c
drivers/mmc/core/debugfs.c
drivers/mmc/core/pwrseq_simple.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio.c
drivers/mmc/core/sdio_cis.c
drivers/mmc/host/mmci.c
drivers/mmc/host/tmio_mmc_dma.c
drivers/mtd/bcm63xxpart.c
drivers/mtd/ubi/cdev.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/ezchip/Kconfig
drivers/net/ethernet/freescale/Makefile
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
drivers/net/ethernet/hisilicon/hns/hnae.c
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_enet.h
drivers/net/ethernet/hp/hp100.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/srq.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/moxa/moxart_ether.h
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/fddi/defxx.c
drivers/net/geneve.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/irda/bfin_sir.h
drivers/net/macvlan.c
drivers/net/phy/Kconfig
drivers/net/phy/dp83640.c
drivers/net/phy/phy.c
drivers/net/phy/smsc.c
drivers/net/ppp/pptp.c
drivers/net/usb/lan78xx.c
drivers/net/vxlan.c
drivers/net/wireless/ath/ath9k/eeprom.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
drivers/net/wireless/intel/iwlwifi/iwl-7000.c
drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
drivers/net/wireless/intel/iwlwifi/mvm/rs.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/ralink/rt2x00/rt2400pci.c
drivers/net/wireless/ralink/rt2x00/rt2500pci.c
drivers/net/wireless/ralink/rt2x00/rt2500usb.c
drivers/net/wireless/ralink/rt2x00/rt2800lib.c
drivers/net/wireless/ralink/rt2x00/rt2x00.h
drivers/net/wireless/ralink/rt2x00/rt2x00config.c
drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
drivers/net/wireless/ralink/rt2x00/rt61pci.c
drivers/net/wireless/ralink/rt2x00/rt73usb.c
drivers/net/wireless/realtek/rtlwifi/regd.c
drivers/net/xen-netfront.c
drivers/ntb/hw/Kconfig
drivers/ntb/hw/Makefile
drivers/ntb/hw/amd/Kconfig [new file with mode: 0644]
drivers/ntb/hw/amd/Makefile [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.c [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.h [new file with mode: 0644]
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/hw/intel/ntb_hw_intel.h
drivers/ntb/ntb_transport.c
drivers/ntb/test/Kconfig
drivers/ntb/test/Makefile
drivers/ntb/test/ntb_perf.c [new file with mode: 0644]
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/pfn_devs.c
drivers/of/irq.c
drivers/of/of_mdio.c
drivers/oprofile/oprofilefs.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel_telemetry_debugfs.c
drivers/pnp/quirks.c
drivers/ptp/ptp_ixp46x.c
drivers/s390/cio/chp.c
drivers/s390/cio/chp.h
drivers/s390/cio/chsc.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/scsi/3w-xxxx.c
drivers/scsi/Kconfig
drivers/scsi/NCR5380.c
drivers/scsi/NCR5380.h
drivers/scsi/arm/cumana_1.c
drivers/scsi/arm/oak.c
drivers/scsi/atari_NCR5380.c
drivers/scsi/atari_scsi.c
drivers/scsi/be2iscsi/Kconfig
drivers/scsi/be2iscsi/be.h
drivers/scsi/be2iscsi/be_iscsi.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
drivers/scsi/dmx3191d.c
drivers/scsi/dtc.c
drivers/scsi/dtc.h
drivers/scsi/g_NCR5380.c
drivers/scsi/g_NCR5380.h
drivers/scsi/hisi_sas/Kconfig
drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
drivers/scsi/imm.c
drivers/scsi/ipr.c
drivers/scsi/ipr.h
drivers/scsi/mac_scsi.c
drivers/scsi/megaraid/megaraid_mm.c
drivers/scsi/pas16.c
drivers/scsi/pas16.h
drivers/scsi/scsi_devinfo.c
drivers/scsi/sd.c
drivers/scsi/sg.c
drivers/scsi/sr.c
drivers/scsi/storvsc_drv.c
drivers/scsi/sun3_scsi.c
drivers/scsi/t128.c
drivers/scsi/t128.h
drivers/soc/Kconfig
drivers/soc/qcom/spm.c
drivers/soc/tegra/Kconfig [new file with mode: 0644]
drivers/ssb/main.c
drivers/staging/iio/adc/Kconfig
drivers/staging/iio/meter/ade7753.c
drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
drivers/staging/lustre/lustre/llite/dir.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/llite/llite_nfs.c
drivers/staging/lustre/lustre/llite/lloop.c
drivers/staging/lustre/lustre/llite/rw.c
drivers/staging/lustre/lustre/llite/rw26.c
drivers/staging/lustre/lustre/llite/vvp_io.c
drivers/staging/lustre/lustre/llite/vvp_page.c
drivers/staging/panel/panel.c
drivers/staging/rdma/Kconfig
drivers/staging/rdma/Makefile
drivers/staging/rdma/amso1100/Kbuild [deleted file]
drivers/staging/rdma/amso1100/Kconfig [deleted file]
drivers/staging/rdma/amso1100/TODO [deleted file]
drivers/staging/rdma/amso1100/c2.c [deleted file]
drivers/staging/rdma/amso1100/c2.h [deleted file]
drivers/staging/rdma/amso1100/c2_ae.c [deleted file]
drivers/staging/rdma/amso1100/c2_ae.h [deleted file]
drivers/staging/rdma/amso1100/c2_alloc.c [deleted file]
drivers/staging/rdma/amso1100/c2_cm.c [deleted file]
drivers/staging/rdma/amso1100/c2_cq.c [deleted file]
drivers/staging/rdma/amso1100/c2_intr.c [deleted file]
drivers/staging/rdma/amso1100/c2_mm.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.h [deleted file]
drivers/staging/rdma/amso1100/c2_pd.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.h [deleted file]
drivers/staging/rdma/amso1100/c2_qp.c [deleted file]
drivers/staging/rdma/amso1100/c2_rnic.c [deleted file]
drivers/staging/rdma/amso1100/c2_status.h [deleted file]
drivers/staging/rdma/amso1100/c2_user.h [deleted file]
drivers/staging/rdma/amso1100/c2_vq.c [deleted file]
drivers/staging/rdma/amso1100/c2_vq.h [deleted file]
drivers/staging/rdma/amso1100/c2_wr.h [deleted file]
drivers/staging/rdma/ehca/Kconfig [deleted file]
drivers/staging/rdma/ehca/Makefile [deleted file]
drivers/staging/rdma/ehca/TODO [deleted file]
drivers/staging/rdma/ehca/ehca_av.c [deleted file]
drivers/staging/rdma/ehca/ehca_classes.h [deleted file]
drivers/staging/rdma/ehca/ehca_classes_pSeries.h [deleted file]
drivers/staging/rdma/ehca/ehca_cq.c [deleted file]
drivers/staging/rdma/ehca/ehca_eq.c [deleted file]
drivers/staging/rdma/ehca/ehca_hca.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.h [deleted file]
drivers/staging/rdma/ehca/ehca_iverbs.h [deleted file]
drivers/staging/rdma/ehca/ehca_main.c [deleted file]
drivers/staging/rdma/ehca/ehca_mcast.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.h [deleted file]
drivers/staging/rdma/ehca/ehca_pd.c [deleted file]
drivers/staging/rdma/ehca/ehca_qes.h [deleted file]
drivers/staging/rdma/ehca/ehca_qp.c [deleted file]
drivers/staging/rdma/ehca/ehca_reqs.c [deleted file]
drivers/staging/rdma/ehca/ehca_sqp.c [deleted file]
drivers/staging/rdma/ehca/ehca_tools.h [deleted file]
drivers/staging/rdma/ehca/ehca_uverbs.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.h [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.c [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns_core.h [deleted file]
drivers/staging/rdma/ehca/hipz_hw.h [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.c [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.h [deleted file]
drivers/staging/rdma/hfi1/mr.c
drivers/staging/rdma/hfi1/verbs.c
drivers/staging/rdma/hfi1/verbs.h
drivers/staging/rdma/ipath/Kconfig [deleted file]
drivers/staging/rdma/ipath/Makefile [deleted file]
drivers/staging/rdma/ipath/TODO [deleted file]
drivers/staging/rdma/ipath/ipath_common.h [deleted file]
drivers/staging/rdma/ipath/ipath_cq.c [deleted file]
drivers/staging/rdma/ipath/ipath_debug.h [deleted file]
drivers/staging/rdma/ipath/ipath_diag.c [deleted file]
drivers/staging/rdma/ipath/ipath_dma.c [deleted file]
drivers/staging/rdma/ipath/ipath_driver.c [deleted file]
drivers/staging/rdma/ipath/ipath_eeprom.c [deleted file]
drivers/staging/rdma/ipath/ipath_file_ops.c [deleted file]
drivers/staging/rdma/ipath/ipath_fs.c [deleted file]
drivers/staging/rdma/ipath/ipath_iba6110.c [deleted file]
drivers/staging/rdma/ipath/ipath_init_chip.c [deleted file]
drivers/staging/rdma/ipath/ipath_intr.c [deleted file]
drivers/staging/rdma/ipath/ipath_kernel.h [deleted file]
drivers/staging/rdma/ipath/ipath_keys.c [deleted file]
drivers/staging/rdma/ipath/ipath_mad.c [deleted file]
drivers/staging/rdma/ipath/ipath_mmap.c [deleted file]
drivers/staging/rdma/ipath/ipath_mr.c [deleted file]
drivers/staging/rdma/ipath/ipath_qp.c [deleted file]
drivers/staging/rdma/ipath/ipath_rc.c [deleted file]
drivers/staging/rdma/ipath/ipath_registers.h [deleted file]
drivers/staging/rdma/ipath/ipath_ruc.c [deleted file]
drivers/staging/rdma/ipath/ipath_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_srq.c [deleted file]
drivers/staging/rdma/ipath/ipath_stats.c [deleted file]
drivers/staging/rdma/ipath/ipath_sysfs.c [deleted file]
drivers/staging/rdma/ipath/ipath_uc.c [deleted file]
drivers/staging/rdma/ipath/ipath_ud.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_pages.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.c [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs_mcast.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_ppc64.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_x86_64.c [deleted file]
drivers/staging/speakup/Kconfig
drivers/staging/speakup/main.c
drivers/staging/speakup/selection.c
drivers/staging/speakup/serialio.c
drivers/thermal/int340x_thermal/processor_thermal_device.c
drivers/thermal/intel_pch_thermal.c
drivers/thermal/rcar_thermal.c
drivers/thermal/rockchip_thermal.c
drivers/thermal/step_wise.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_core.h
drivers/tty/n_tty.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/tty_io.c
drivers/tty/tty_mutex.c
drivers/tty/vt/vt.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/core/hub.c
drivers/usb/dwc2/core.c
drivers/usb/dwc2/platform.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_printer.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/host/Kconfig
drivers/usb/host/xhci-ext-caps.h
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/ux500.c
drivers/usb/phy/phy-msm-usb.c
drivers/usb/phy/phy-mxs-usb.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/mxu11x0.c
drivers/usb/serial/option.c
drivers/usb/serial/visor.c
drivers/vfio/vfio.c
drivers/video/fbdev/core/fb_defio.c
drivers/virtio/virtio_pci_common.c
drivers/watchdog/Kconfig
drivers/watchdog/max63xx_wdt.c
drivers/watchdog/pcwd_usb.c
drivers/watchdog/sp805_wdt.c
drivers/xen/tmem.c
fs/9p/vfs_file.c
fs/affs/file.c
fs/afs/flock.c
fs/afs/write.c
fs/attr.c
fs/binfmt_elf.c
fs/binfmt_misc.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/free-space-tree.c
fs/btrfs/inode-map.c
fs/btrfs/inode-map.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/cachefiles/interface.c
fs/cachefiles/namei.c
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_debug.h
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smb2transport.c
fs/cifs/transport.c
fs/coda/coda_linux.h
fs/coda/dir.c
fs/coda/file.c
fs/compat_ioctl.c
fs/configfs/dir.c
fs/configfs/file.c
fs/configfs/inode.c
fs/dax.c
fs/dcache.c
fs/debugfs/inode.c
fs/devpts/inode.c
fs/direct-io.c
fs/dlm/user.c
fs/ecryptfs/inode.c
fs/ecryptfs/mmap.c
fs/efivarfs/file.c
fs/efivarfs/super.c
fs/eventpoll.c
fs/exec.c
fs/exofs/file.c
fs/exportfs/expfs.c
fs/ext2/file.c
fs/ext2/ioctl.c
fs/ext4/crypto.c
fs/ext4/crypto_key.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/truncate.h
fs/f2fs/data.c
fs/f2fs/file.c
fs/fat/dir.c
fs/fat/file.c
fs/filesystems.c
fs/fuse/dir.c
fs/fuse/file.c
fs/gfs2/file.c
fs/gfs2/inode.c
fs/gfs2/quota.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hfsplus/ioctl.c
fs/hostfs/hostfs_kern.c
fs/hpfs/dir.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/ioctl.c
fs/jffs2/build.c
fs/jffs2/file.c
fs/jffs2/fs.c
fs/jffs2/super.c
fs/jfs/file.c
fs/jfs/ioctl.c
fs/jfs/super.c
fs/kernfs/dir.c
fs/libfs.c
fs/locks.c
fs/logfs/file.c
fs/namei.c
fs/namespace.c
fs/ncpfs/dir.c
fs/ncpfs/file.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs42proc.c
fs/nfs/nfs4file.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/write.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfsfh.h
fs/nfsd/vfs.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/ntfs/dir.c
fs/ntfs/file.c
fs/ntfs/quota.c
fs/ntfs/super.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/localalloc.c
fs/ocfs2/move_extents.c
fs/ocfs2/namei.c
fs/ocfs2/quota_global.c
fs/ocfs2/refcounttree.c
fs/ocfs2/resize.c
fs/ocfs2/suballoc.c
fs/ocfs2/xattr.c
fs/open.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/pipe.c
fs/proc/kcore.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/proc/thread_self.c
fs/pstore/inode.c
fs/quota/dquot.c
fs/read_write.c
fs/readdir.c
fs/reiserfs/dir.c
fs/reiserfs/file.c
fs/reiserfs/ioctl.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/timerfd.c
fs/tracefs/inode.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/xattr.c
fs/udf/file.c
fs/udf/inode.c
fs/udf/super.c
fs/utimes.c
fs/xattr.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_trans_ail.c
include/acpi/cppc_acpi.h
include/crypto/hash.h
include/crypto/if_alg.h
include/crypto/skcipher.h
include/drm/drm_atomic_helper.h
include/drm/drm_cache.h
include/drm/drm_dp_mst_helper.h
include/drm/drm_fixed.h
include/linux/bcm963xx_nvram.h [new file with mode: 0644]
include/linux/bcm963xx_tag.h [new file with mode: 0644]
include/linux/blk-iopoll.h [deleted file]
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_frag.h
include/linux/ceph/messenger.h
include/linux/cleancache.h
include/linux/crush/crush.h
include/linux/dax.h
include/linux/fs.h
include/linux/ftrace.h
include/linux/gfp.h
include/linux/hrtimer.h
include/linux/huge_mm.h
include/linux/interrupt.h
include/linux/iommu.h
include/linux/irq_poll.h [new file with mode: 0644]
include/linux/irqdomain.h
include/linux/memcontrol.h
include/linux/mlx4/cmd.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/qp.h
include/linux/mlx5/transobj.h [new file with mode: 0644]
include/linux/mlx5/vport.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/module.h
include/linux/netdevice.h
include/linux/of.h
include/linux/pagemap.h
include/linux/perf_event.h
include/linux/pfn_t.h
include/linux/pipe_fs_i.h
include/linux/platform_data/sdhci-pic32.h [new file with mode: 0644]
include/linux/pmem.h
include/linux/radix-tree.h
include/linux/raid/pq.h
include/linux/rmap.h
include/linux/sched.h
include/linux/shmem_fs.h
include/linux/sunrpc/svc_rdma.h
include/linux/swiotlb.h
include/linux/thermal.h
include/linux/tty.h
include/linux/workqueue.h
include/media/videobuf2-core.h
include/net/bluetooth/l2cap.h
include/net/dst_metadata.h
include/net/ip6_route.h
include/net/netfilter/nf_conntrack_core.h
include/net/sctp/structs.h
include/net/sock.h
include/net/sock_reuseport.h
include/net/tcp.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_mad.h
include/rdma/ib_pack.h
include/rdma/ib_pma.h
include/rdma/ib_sa.h
include/rdma/ib_verbs.h
include/scsi/iser.h [new file with mode: 0644]
include/sound/rawmidi.h
include/sound/timer.h
include/trace/events/ext4.h
include/trace/events/fence.h
include/trace/events/huge_memory.h
include/trace/events/irq.h
include/uapi/drm/etnaviv_drm.h
include/uapi/linux/fs.h
ipc/mqueue.c
ipc/sem.c
ipc/util.c
ipc/util.h
kernel/audit_fsnotify.c
kernel/audit_watch.c
kernel/bpf/arraymap.c
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/events/ring_buffer.c
kernel/futex.c
kernel/irq/handle.c
kernel/irq/irqdomain.c
kernel/locking/rtmutex.c
kernel/memremap.c
kernel/module.c
kernel/pid.c
kernel/power/Kconfig
kernel/relay.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/seccomp.c
kernel/signal.c
kernel/sysctl.c
kernel/time/hrtimer.c
kernel/time/itimer.c
kernel/time/ntp.c
kernel/time/posix-timers.c
kernel/time/tick-sched.c
kernel/time/timer_list.c
kernel/trace/bpf_trace.c
kernel/trace/trace.c
kernel/trace/trace_stack.c
kernel/workqueue.c
lib/Kconfig
lib/Kconfig.debug
lib/Makefile
lib/debugobjects.c
lib/dump_stack.c
lib/irq_poll.c [new file with mode: 0644]
lib/libcrc32c.c
lib/radix-tree.c
lib/ratelimit.c
lib/scatterlist.c
lib/test-string_helpers.c
mm/Kconfig
mm/backing-dev.c
mm/cleancache.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/memblock.c
mm/memcontrol.c
mm/memory.c
mm/mempolicy.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/page_alloc.c
mm/percpu.c
mm/shmem.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmpressure.c
mm/vmscan.c
mm/vmstat.c
mm/workingset.c
net/9p/trans_fd.c
net/9p/trans_virtio.c
net/bluetooth/6lowpan.c
net/bluetooth/hci_request.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/smp.c
net/bridge/br.c
net/ceph/auth_x.c
net/ceph/auth_x.h
net/ceph/crush/mapper.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/core/dev.c
net/core/sock_reuseport.c
net/ipv4/Kconfig
net/ipv4/fib_trie.c
net/ipv4/inet_diag.c
net/ipv4/ip_fragment.c
net/ipv4/ip_input.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv6/Kconfig
net/ipv6/datagram.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/udp.c
net/irda/ircomm/ircomm_param.c
net/iucv/af_iucv.c
net/mac80211/ibss.c
net/mac80211/main.c
net/mac80211/mesh.c
net/mac80211/mesh.h
net/mac80211/mlme.c
net/mac80211/offchannel.c
net/mac80211/scan.c
net/mac80211/sta_info.c
net/mac80211/status.c
net/mac80211/util.c
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_tables_netdev.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_ct.c
net/netfilter/xt_TCPMSS.c
net/netlink/af_netlink.c
net/rds/ib.c
net/rds/iw.c
net/rfkill/core.c
net/sched/sch_drr.c
net/sctp/input.c
net/sctp/proc.c
net/sctp/sm_sideeffect.c
net/sctp/socket.c
net/sctp/transport.c
net/sunrpc/cache.c
net/sunrpc/rpc_pipe.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c [new file with mode: 0644]
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/switchdev/switchdev.c
net/tipc/subscr.c
net/unix/af_unix.c
net/wireless/reg.c
scripts/mod/modpost.c
scripts/prune-kernel [new file with mode: 0755]
security/inode.c
security/integrity/ima/ima_main.c
security/keys/key.c
security/selinux/selinuxfs.c
sound/core/Kconfig
sound/core/compress_offload.c
sound/core/control.c
sound/core/hrtimer.c
sound/core/oss/pcm_oss.c
sound/core/pcm_compat.c
sound/core/rawmidi.c
sound/core/seq/oss/seq_oss_init.c
sound/core/seq/oss/seq_oss_synth.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_compat.c
sound/core/seq/seq_ports.c
sound/core/seq/seq_timer.c
sound/core/seq/seq_virmidi.c
sound/core/timer.c
sound/drivers/dummy.c
sound/firewire/bebob/bebob_stream.c
sound/hda/hdac_i915.c
sound/isa/Kconfig
sound/pci/Kconfig
sound/pci/emu10k1/emu10k1_main.c
sound/pci/hda/hda_bind.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/sparc/Kconfig
sound/spi/at73c213.c
sound/usb/quirks.c
tools/lib/traceevent/event-parse.c
tools/perf/Makefile.perf
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/config/Makefile
tools/perf/tests/make
tools/perf/ui/browsers/annotate.c
tools/perf/util/hist.c
tools/perf/util/session.c
tools/perf/util/stat.c
tools/perf/util/symbol.c
tools/perf/util/trace-event-parse.c
tools/testing/nvdimm/test/iomap.c
tools/testing/selftests/timers/valid-adjtimex.c
tools/virtio/asm/barrier.h
tools/virtio/linux/compiler.h [new file with mode: 0644]
tools/virtio/linux/kernel.h
tools/virtio/ringtest/Makefile [new file with mode: 0644]
tools/virtio/ringtest/README [new file with mode: 0644]
tools/virtio/ringtest/main.c [new file with mode: 0644]
tools/virtio/ringtest/main.h [new file with mode: 0644]
tools/virtio/ringtest/ring.c [new file with mode: 0644]
tools/virtio/ringtest/run-on-all.sh [new file with mode: 0755]
tools/virtio/ringtest/virtio_ring_0_9.c [new file with mode: 0644]
tools/virtio/ringtest/virtio_ring_poll.c [new file with mode: 0644]

index b1e9a97..7e6c533 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/ABI/testing/configfs-rdma_cm b/Documentation/ABI/testing/configfs-rdma_cm
new file mode 100644 (file)
index 0000000..5c389aa
--- /dev/null
@@ -0,0 +1,22 @@
+What:          /config/rdma_cm
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   Interface is used to configure RDMA-cable HCAs in respect to
+               RDMA-CM attributes.
+
+               Attributes are visible only when configfs is mounted. To mount
+               configfs in /config directory use:
+               # mount -t configfs none /config/
+
+               In order to set parameters related to a specific HCA, a directory
+               for this HCA has to be created:
+               mkdir -p /config/rdma_cm/<hca>
+
+
+What:          /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   RDMA-CM based connections from HCA <hca> at port <port-num>
+               will be initiated with this RoCE type as default.
+               The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
+               This parameter has RW access.
diff --git a/Documentation/ABI/testing/sysfs-class-infiniband b/Documentation/ABI/testing/sysfs-class-infiniband
new file mode 100644 (file)
index 0000000..a86abe6
--- /dev/null
@@ -0,0 +1,16 @@
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The net-device's name associated with the GID resides
+               at index <gid-index>.
+
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The RoCE type of the associated GID resides at index <gid-index>.
+               This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
+               or "RoCE v2" for RoCE v2 based GIDs.
+
+
index 7b57fc0..49585b6 100644 (file)
@@ -3,7 +3,7 @@ Linux IOMMU Support
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
index 9ae148a..ff49cf9 100644 (file)
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back.
                Amount of memory used to cache filesystem data,
                including tmpfs and shared memory.
 
+         sock
+
+               Amount of memory used in network transmission buffers
+
          file_mapped
 
                Amount of cached filesystem data mapped with mmap()
index cf1333d..2164123 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
 Optional properties:
        - autorepeat: Boolean, Enable auto repeat feature of Linux input
          subsystem.
+       - label: String, name of the input device.
 
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt b/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt
new file mode 100644 (file)
index 0000000..c3a1b37
--- /dev/null
@@ -0,0 +1,67 @@
+Microchip PIC32 Interrupt Controller
+====================================
+
+The Microchip PIC32 contains an Enhanced Vectored Interrupt Controller (EVIC).
+It handles all internal and external interrupts. This controller exists outside
+of the CPU and is the arbitrator of all interrupts (including interrupts from
+the CPU itself) before they are presented to the CPU.
+
+External interrupts have a software configurable edge polarity. Non external
+interrupts have a type and polarity that is determined by the source of the
+interrupt.
+
+Required properties
+-------------------
+
+- compatible: Should be "microchip,pic32mzda-evic"
+- reg: Specifies physical base address and size of register range.
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt cells: Specifies the number of cells used to encode an interrupt
+  source connected to this controller. The value shall be 2 and interrupt
+  descriptor shall have the following format:
+
+       <hw_irq irq_type>
+
+  hw_irq - represents the hardware interrupt number as in the data sheet.
+  irq_type - is used to describe the type and polarity of an interrupt. For
+  internal interrupts use IRQ_TYPE_EDGE_RISING for non persistent interrupts and
+  IRQ_TYPE_LEVEL_HIGH for persistent interrupts. For external interrupts use
+  IRQ_TYPE_EDGE_RISING or IRQ_TYPE_EDGE_FALLING to select the desired polarity.
+
+Optional properties
+-------------------
+- microchip,external-irqs: u32 array of external interrupts with software
+  polarity configuration. This array corresponds to the bits in the INTCON
+  SFR.
+
+Example
+-------
+
+evic: interrupt-controller@1f810000 {
+       compatible = "microchip,pic32mzda-evic";
+       interrupt-controller;
+       #interrupt-cells = <2>;
+       reg = <0x1f810000 0x1000>;
+       microchip,external-irqs = <3 8 13 18 23>;
+};
+
+Each device/peripheral must request its interrupt line with the associated type
+and polarity.
+
+Internal interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <113 IRQ_TYPE_LEVEL_HIGH>;
+       ...
+};
+
+External interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+       ...
+};
diff --git a/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt b/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt
new file mode 100644 (file)
index 0000000..1c8dbc4
--- /dev/null
@@ -0,0 +1,31 @@
+* Microchip PIC32MZDA Platforms
+
+PIC32MZDA Starter Kit
+Required root node properties:
+    - compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda"
+
+CPU nodes:
+----------
+A "cpus" node is required.  Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required.  Required properties:
+ - device_type: Must be "cpu".
+ - compatible: Must be "mti,mips14KEc".
+Example:
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu0: cpu@0 {
+                       device_type = "cpu";
+                       compatible = "mti,mips14KEc";
+               };
+       };
+
+Boot protocol
+--------------
+In accordance with Unified Hosting Interface Reference Manual (MD01069), the
+bootloader must pass the following arguments to the kernel:
+ - $a0: -2.
+ - $a1: KSEG0 address of the flattened device-tree blob.
index 451fef2..10587bd 100644 (file)
@@ -68,7 +68,7 @@ ethernet@f0b60000 {
                phy1: ethernet-phy@1 {
                        max-speed = <1000>;
                        reg = <0x1>;
-                       compatible = "brcm,28nm-gphy", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
@@ -115,7 +115,7 @@ ethernet@f0ba0000 {
                phy0: ethernet-phy@0 {
                        max-speed = <1000>;
                        reg = <0x0>;
-                       compatible = "brcm,bcm53125", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
index 80411b2..ecacfa4 100644 (file)
@@ -4,8 +4,6 @@ Required properties:
 - compatible: should be "hisilicon,hns-dsaf-v1" or "hisilicon,hns-dsaf-v2".
   "hisilicon,hns-dsaf-v1" is for hip05.
   "hisilicon,hns-dsaf-v2" is for Hi1610 and Hi1612.
-- dsa-name: dsa fabric name who provide this interface.
-  should be "dsafX", X is the dsaf id.
 - mode: dsa fabric mode string. only support one of dsaf modes like these:
                "2port-64vf",
                "6port-16rss",
@@ -26,9 +24,8 @@ Required properties:
 
 Example:
 
-dsa: dsa@c7000000 {
+dsaf0: dsa@c7000000 {
        compatible = "hisilicon,hns-dsaf-v1";
-       dsa_name = "dsaf0";
        mode = "6port-16rss";
        interrupt-parent = <&mbigen_dsa>;
        reg = <0x0 0xC0000000 0x0 0x420000
index 41d19be..e6a9d1c 100644 (file)
@@ -4,8 +4,9 @@ Required properties:
 - compatible: "hisilicon,hns-nic-v1" or "hisilicon,hns-nic-v2".
   "hisilicon,hns-nic-v1" is for hip05.
   "hisilicon,hns-nic-v2" is for Hi1610 and Hi1612.
-- ae-name: accelerator name who provides this interface,
-  is simply a name referring to the name of name in the accelerator node.
+- ae-handle: accelerator engine handle for hns,
+  specifies a reference to the associating hardware driver node.
+  see Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
 - port-id: is the index of port provided by DSAF (the accelerator). DSAF can
   connect to 8 PHYs. Port 0 to 1 are both used for adminstration purpose. They
   are called debug ports.
@@ -41,7 +42,7 @@ Example:
 
        ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [a2 14 e4 4b 56 76];
        };
index aeea50c..d0cb869 100644 (file)
@@ -6,12 +6,17 @@ Required properties:
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
 - phy-mode: See ethernet.txt file in the same directory
-- clocks: a pointer to the reference clock for this device.
+- clocks: List of clocks for this device. At least one clock is
+  mandatory for the core clock. If several clocks are given, then the
+  clock-names property must be used to identify them.
 
 Optional properties:
 - tx-csum-limit: maximum mtu supported by port that allow TX checksum.
   Value is presented in bytes. If not used, by default 1600B is set for
   "marvell,armada-370-neta" and 9800B for others.
+- clock-names: List of names corresponding to clocks property; shall be
+  "core" for core clock and "bus" for the optional bus clock.
+
 
 Example:
 
index 7938411..694987d 100644 (file)
@@ -38,7 +38,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -48,7 +47,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -58,7 +56,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -68,7 +65,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -85,7 +81,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -95,7 +90,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -105,7 +99,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -115,7 +108,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
index f65606f..491f5bd 100644 (file)
@@ -47,7 +47,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -57,7 +56,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -67,7 +65,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -77,7 +74,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -94,7 +90,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -104,7 +99,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -114,7 +108,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -124,7 +117,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
diff --git a/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt b/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt
new file mode 100644 (file)
index 0000000..aa63130
--- /dev/null
@@ -0,0 +1,26 @@
+Mediatek Gigabit Switch
+=======================
+
+The mediatek gigabit switch can be found on Mediatek SoCs (mt7620, mt7621).
+
+Required properties:
+- compatible: Should be "mediatek,mt7620-gsw" or "mediatek,mt7621-gsw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the gigabit switches interrupt
+- resets: Should contain the gigabit switches resets
+- reset-names: Should contain the reset names "gsw"
+
+Example:
+
+gsw@10110000 {
+       compatible = "ralink,mt7620-gsw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "gsw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
index 525e165..bc1c3c8 100644 (file)
@@ -17,8 +17,7 @@ Optional Properties:
   "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
   PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
   specifications. If neither of these are specified, the default is to
-  assume clause 22. The compatible list may also contain other
-  elements.
+  assume clause 22.
 
   If the phy's identifier is known then the list may contain an entry
   of the form: "ethernet-phy-idAAAA.BBBB" where
@@ -28,6 +27,9 @@ Optional Properties:
             4 hex digits. This is the chip vendor OUI bits 19:24,
             followed by 10 bits of a vendor specific ID.
 
+  The compatible list should not contain other values than those
+  listed here.
+
 - max-speed: Maximum PHY supported speed (10, 100, 1000...)
 
 - broken-turn-around: If set, indicates the PHY device does not correctly
diff --git a/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt b/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt
new file mode 100644 (file)
index 0000000..88b095d
--- /dev/null
@@ -0,0 +1,61 @@
+Ralink Frame Engine Ethernet controller
+=======================================
+
+The Ralink frame engine ethernet controller can be found on Ralink and
+Mediatek SoCs (RT288x, RT3x5x, RT366x, RT388x, rt5350, mt7620, mt7621, mt76x8).
+
+Depending on the SoC, there is a number of ports connected to the CPU port
+directly and/or via a (gigabit-)switch.
+
+* Ethernet controller node
+
+Required properties:
+- compatible: Should be one of "ralink,rt2880-eth", "ralink,rt3050-eth",
+  "ralink,rt3050-eth", "ralink,rt3883-eth", "ralink,rt5350-eth",
+  "mediatek,mt7620-eth", "mediatek,mt7621-eth"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the frame engines interrupt
+- resets: Should contain the frame engines resets
+- reset-names: Should contain the reset names "fe". If a switch is present
+  "esw" is also required.
+
+
+* Ethernet port node
+
+Required properties:
+- compatible: Should be "ralink,eth-port"
+- reg: The number of the physical port
+- phy-handle: reference to the node describing the phy
+
+Example:
+
+mdio-bus {
+       ...
+       phy0: ethernet-phy@0 {
+               phy-mode = "mii";
+               reg = <0>;
+       };
+};
+
+ethernet@400000 {
+       compatible = "ralink,rt2880-eth";
+       reg = <0x00400000 10000>;
+
+       #address-cells = <1>;
+       #size-cells = <0>;
+
+       resets = <&rstctrl 18>;
+       reset-names = "fe";
+
+       interrupt-parent = <&cpuintc>;
+       interrupts = <5>;
+
+       port@0 {
+               compatible = "ralink,eth-port";
+               reg = <0>;
+               phy-handle = <&phy0>;
+       };
+
+};
diff --git a/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt b/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt
new file mode 100644 (file)
index 0000000..2e79bd3
--- /dev/null
@@ -0,0 +1,32 @@
+Ralink Fast Ethernet Embedded Switch
+====================================
+
+The ralink fast ethernet embedded switch can be found on Ralink and Mediatek
+SoCs (RT3x5x, RT5350, MT76x8).
+
+Required properties:
+- compatible: Should be "ralink,rt3050-esw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the embedded switches interrupt
+- resets: Should contain the embedded switches resets
+- reset-names: Should contain the reset names "esw"
+
+Optional properties:
+- ralink,portmap: can be used to choose if the default switch setup is
+  llllw or wllll
+- ralink,led_polarity: override the active high/low settings of the leds
+
+Example:
+
+esw@10110000 {
+       compatible = "ralink,rt3050-esw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "esw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
diff --git a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
new file mode 100644 (file)
index 0000000..cafe219
--- /dev/null
@@ -0,0 +1,18 @@
+* Atheros AR71XX/9XXX USB PHY
+
+Required properties:
+- compatible: "qca,ar7100-usb-phy"
+- #phys-cells: should be 0
+- reset-names: "usb-phy"[, "usb-suspend-override"]
+- resets: references to the reset controllers
+
+Example:
+
+       usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+       };
index 0dfa60d..08efe6b 100644 (file)
@@ -2,8 +2,10 @@
 
 Required properties:
 - compatible : should be "rockchip,<name>-tsadc"
+   "rockchip,rk3228-tsadc": found on RK3228 SoCs
    "rockchip,rk3288-tsadc": found on RK3288 SoCs
    "rockchip,rk3368-tsadc": found on RK3368 SoCs
+   "rockchip,rk3399-tsadc": found on RK3399 SoCs
 - reg : physical base address of the controller and length of memory mapped
        region.
 - interrupts : The interrupt number to the cpu. The interrupt specifier format
index fde9fd0..843b045 100644 (file)
@@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1)
  RssFile                     size of resident file mappings
  RssShmem                    size of resident shmem memory (includes SysV shm,
                              mapping of tmpfs and shared anonymous mappings)
- VmData                      size of data, stack, and text segments
- VmStk                       size of data, stack, and text segments
+ VmData                      size of private data segments
+ VmStk                       size of stack segments
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
@@ -356,7 +356,7 @@ address           perms offset  dev   inode      pathname
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
 a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack:1001]
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
 a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
 a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
@@ -388,7 +388,6 @@ is not associated with a file:
 
  [heap]                   = the heap of the program
  [stack]                  = the stack of the main process
- [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
 
@@ -396,10 +395,8 @@ is not associated with a file:
 
 The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
 of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. This is a key difference from the
-content of /proc/PID/maps, where you will see all mappings that are being used
-as stack by all of those tasks. Hence, for the example above, the task-level
-map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+as [stack] if that task sees it as a stack. Hence, for the example above, the
+task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
 
 08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
index e167854..4b1f36b 100644 (file)
@@ -15,7 +15,6 @@ Sleeping and interrupt context
     modify_ah
     query_ah
     destroy_ah
-    bind_mw
     post_send
     post_recv
     poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
     ib_modify_ah
     ib_query_ah
     ib_destroy_ah
-    ib_bind_mw
     ib_post_send
     ib_post_recv
     ib_req_notify_cq
index cfb2c0f..9a53c92 100644 (file)
@@ -1454,6 +1454,41 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
 
+       ieee754=        [MIPS] Select IEEE Std 754 conformance mode
+                       Format: { strict | legacy | 2008 | relaxed }
+                       Default: strict
+
+                       Choose which programs will be accepted for execution
+                       based on the IEEE 754 NaN encoding(s) supported by
+                       the FPU and the NaN encoding requested with the value
+                       of an ELF file header flag individually set by each
+                       binary.  Hardware implementations are permitted to
+                       support either or both of the legacy and the 2008 NaN
+                       encoding mode.
+
+                       Available settings are as follows:
+                       strict  accept binaries that request a NaN encoding
+                               supported by the FPU
+                       legacy  only accept legacy-NaN binaries, if supported
+                               by the FPU
+                       2008    only accept 2008-NaN binaries, if supported
+                               by the FPU
+                       relaxed accept any binaries regardless of whether
+                               supported by the FPU
+
+                       The FPU emulator is always able to support both NaN
+                       encodings, so if no FPU hardware is present or it has
+                       been disabled with 'nofpu', then the settings of
+                       'legacy' and '2008' strap the emulator accordingly,
+                       'relaxed' straps the emulator for both legacy-NaN and
+                       2008-NaN, whereas 'strict' enables legacy-NaN only on
+                       legacy processors and both NaN encodings on MIPS32 or
+                       MIPS64 CPUs.
+
+                       The setting for ABS.fmt/NEG.fmt instruction execution
+                       mode generally follows that for the NaN encoding,
+                       except where unsupported by hardware.
+
        ignore_loglevel [KNL]
                        Ignore loglevel setting - this will print /all/
                        kernel messages to the console. Useful for debugging.
@@ -1461,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        could change it dynamically, usually by
                        /sys/module/printk/parameters/ignore_loglevel.
 
+       ignore_rlimit_data
+                       Ignore RLIMIT_DATA setting for data mappings,
+                       print warning at first misuse.  Can be changed via
+                       /sys/module/kernel/parameters/ignore_rlimit_data.
+
        ihash_entries=  [KNL]
                        Set number of hash buckets for inode cache.
 
@@ -4195,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        The default value of this parameter is determined by
                        the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 
+       workqueue.debug_force_rr_cpu
+                       Workqueue used to implicitly guarantee that work
+                       items queued without explicit CPU specified are put
+                       on the local CPU.  This guarantee is no longer true
+                       and while local CPU is still preferred work items
+                       may be put on foreign CPUs.  This debug option
+                       forces round-robin CPU selection to flush out
+                       usages which depend on the now broken guarantee.
+                       When enabled, memory and cache locality will be
+                       impacted.
+
        x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                        default x2apic cluster mode on platforms
                        supporting x2apic.
index f4cbfe0..edec3a3 100644 (file)
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ:  Do all of the following:
        from being initiated from tasks that might run on the CPU to
        be de-jittered.  (It is OK to force this CPU offline and then
        bring it back online before you start your application.)
-BLOCK_IOPOLL_SOFTIRQ:  Do all of the following:
+IRQ_POLL_SOFTIRQ:  Do all of the following:
 1.     Force block-device interrupts onto some other CPU.
 2.     Initiate any block I/O and block-I/O polling on other CPUs.
 3.     Once your application has started, prevent CPU-hotplug operations
index ceb44a0..73b36d7 100644 (file)
@@ -594,7 +594,7 @@ tcp_fastopen - INTEGER
 
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
-       will be retransmitted. Should not be higher than 255. Default value
+       will be retransmitted. Should not be higher than 127. Default value
        is 6, which corresponds to 63seconds till the last retransmission
        with the current initial RTO of 1second. With this the final timeout
        for an active TCP connection attempt will happen after 127seconds.
index 88152f2..302b5ed 100644 (file)
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
 - nr_open
 - overflowuid
 - overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
 - protected_hardlinks
 - protected_symlinks
 - suid_dumpable
@@ -159,6 +161,27 @@ The default is 65534.
 
 ==============================================================
 
+pipe-user-pages-hard:
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+==============================================================
+
+pipe-user-pages-soft:
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+==============================================================
+
 protected_hardlinks:
 
 A long-standing class of security issues is the hardlink-based
index 053f613..07e4cdf 100644 (file)
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
index b8a717c..7f1fa4f 100644 (file)
@@ -223,9 +223,7 @@ F:  drivers/scsi/aacraid/
 
 ABI/API
 L:     linux-api@vger.kernel.org
-F:     Documentation/ABI/
 F:     include/linux/syscalls.h
-F:     include/uapi/
 F:     kernel/sys_ni.c
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@ -686,13 +684,6 @@ M: Michael Hanselmann <linux-kernel@hansmi.ch>
 S:     Supported
 F:     drivers/macintosh/ams/
 
-AMSO1100 RNIC DRIVER
-M:     Tom Tucker <tom@opengridcomputing.com>
-M:     Steve Wise <swise@opengridcomputing.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/infiniband/hw/amso1100/
-
 ANALOG DEVICES INC AD9389B DRIVER
 M:     Hans Verkuil <hans.verkuil@cisco.com>
 L:     linux-media@vger.kernel.org
@@ -967,6 +958,8 @@ M:  Rob Herring <robh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-highbank/
+F:     arch/arm/boot/dts/highbank.dts
+F:     arch/arm/boot/dts/ecx-*.dts*
 
 ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
 M:     Krzysztof Halasa <khalasa@piap.pl>
@@ -1042,6 +1035,7 @@ M:        Barry Song <baohua@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/prima2*
 F:     arch/arm/mach-prima2/
 F:     drivers/clk/sirf/
 F:     drivers/clocksource/timer-prima2.c
@@ -1143,6 +1137,10 @@ W:       http://www.hisilicon.com
 S:     Supported
 T:     git git://github.com/hisilicon/linux-hisi.git
 F:     arch/arm/mach-hisi/
+F:     arch/arm/boot/dts/hi3*
+F:     arch/arm/boot/dts/hip*
+F:     arch/arm/boot/dts/hisi*
+F:     arch/arm64/boot/dts/hisilicon/
 
 ARM/HP JORNADA 7XX MACHINE SUPPORT
 M:     Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -1219,6 +1217,7 @@ M:        Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-keystone/
+F:     arch/arm/boot/dts/k2*
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
 
 ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@ -1287,6 +1286,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-berlin/
 F:     arch/arm/boot/dts/berlin*
+F:     arch/arm64/boot/dts/marvell/berlin*
 
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
@@ -1425,6 +1425,7 @@ S:        Maintained
 F:     arch/arm/boot/dts/qcom-*.dts
 F:     arch/arm/boot/dts/qcom-*.dtsi
 F:     arch/arm/mach-qcom/
+F:     arch/arm64/boot/dts/qcom/*
 F:     drivers/soc/qcom/
 F:     drivers/tty/serial/msm_serial.h
 F:     drivers/tty/serial/msm_serial.c
@@ -1484,6 +1485,8 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/s3c*
+F:     arch/arm/boot/dts/s5p*
+F:     arch/arm/boot/dts/samsung*
 F:     arch/arm/boot/dts/exynos*
 F:     arch/arm64/boot/dts/exynos/
 F:     arch/arm/plat-samsung/
@@ -1563,6 +1566,7 @@ S:        Maintained
 F:     arch/arm/mach-socfpga/
 F:     arch/arm/boot/dts/socfpga*
 F:     arch/arm/configs/socfpga_defconfig
+F:     arch/arm64/boot/dts/altera/
 W:     http://www.rocketboards.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
@@ -1716,7 +1720,7 @@ M:        Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/vexpress*
-F:     arch/arm64/boot/dts/arm/vexpress*
+F:     arch/arm64/boot/dts/arm/
 F:     arch/arm/mach-vexpress/
 F:     */*/vexpress*
 F:     */*/*/vexpress*
@@ -2343,6 +2347,7 @@ F:        arch/arm/mach-bcm/
 F:     arch/arm/boot/dts/bcm113*
 F:     arch/arm/boot/dts/bcm216*
 F:     arch/arm/boot/dts/bcm281*
+F:     arch/arm64/boot/dts/broadcom/
 F:     arch/arm/configs/bcm_defconfig
 F:     drivers/mmc/host/sdhci-bcm-kona.c
 F:     drivers/clocksource/bcm_kona_timer.c
@@ -2420,6 +2425,8 @@ F:        arch/mips/kernel/*bmips*
 F:     arch/mips/boot/dts/brcm/bcm*.dts*
 F:     drivers/irqchip/irq-bcm7*
 F:     drivers/irqchip/irq-brcmstb*
+F:     include/linux/bcm963xx_nvram.h
+F:     include/linux/bcm963xx_tag.h
 
 BROADCOM TG3 GIGABIT ETHERNET DRIVER
 M:     Prashant Sreedharan <prashant@broadcom.com>
@@ -3443,7 +3450,7 @@ S:        Maintained
 F:     drivers/usb/dwc2/
 
 DESIGNWARE USB3 DRD IP DRIVER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 L:     linux-omap@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -4182,13 +4189,6 @@ W:       http://aeschi.ch.eu.org/efs/
 S:     Orphan
 F:     fs/efs/
 
-EHCA (IBM GX bus InfiniBand adapter) DRIVER
-M:     Hoang-Nam Nguyen <hnguyen@de.ibm.com>
-M:     Christoph Raisch <raisch@de.ibm.com>
-L:     linux-rdma@vger.kernel.org
-S:     Supported
-F:     drivers/infiniband/hw/ehca/
-
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
 M:     Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
 L:     netdev@vger.kernel.org
@@ -5780,10 +5780,8 @@ INTEL TELEMETRY DRIVER
 M:     Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
-F:     drivers/platform/x86/intel_telemetry_core.c
 F:     arch/x86/include/asm/intel_telemetry.h
-F:     drivers/platform/x86/intel_telemetry_pltdrv.c
-F:     drivers/platform/x86/intel_telemetry_debugfs.c
+F:     drivers/platform/x86/intel_telemetry*
 
 IOC3 ETHERNET DRIVER
 M:     Ralf Baechle <ralf@linux-mips.org>
@@ -5809,12 +5807,6 @@ M:       Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
 S:     Maintained
 F:     net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IPATH DRIVER
-M:     Mike Marciniszyn <infinipath@intel.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/staging/rdma/ipath/
-
 IPMI SUBSYSTEM
 M:     Corey Minyard <minyard@acm.org>
 L:     openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
@@ -6218,6 +6210,14 @@ F:       arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/include/asm/kvm*
 F:     arch/arm64/kvm/
 
+KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
+M:     James Hogan <james.hogan@imgtec.com>
+L:     linux-mips@linux-mips.org
+S:     Supported
+F:     arch/mips/include/uapi/asm/kvm*
+F:     arch/mips/include/asm/kvm*
+F:     arch/mips/kvm/
+
 KEXEC
 M:     Eric Biederman <ebiederm@xmission.com>
 W:     http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -6315,6 +6315,12 @@ S:       Maintained
 F:     net/l3mdev
 F:     include/net/l3mdev.h
 
+LANTIQ MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/lantiq
+
 LAPB module
 L:     linux-x25@vger.kernel.org
 S:     Orphan
@@ -7151,27 +7157,45 @@ W:      https://linuxtv.org
 S:     Odd Fixes
 F:     drivers/media/radio/radio-miropcm20*
 
-Mellanox MLX5 core VPI driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M:     Yishai Hadas <yishaih@mellanox.com>
 L:     netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
+S:     Supported
+F:     drivers/net/ethernet/mellanox/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M:     Yishai Hadas <yishaih@mellanox.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
+S:     Supported
+F:     drivers/infiniband/hw/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX5 core VPI driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
+L:     netdev@vger.kernel.org
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
+Q:     http://patchwork.ozlabs.org/project/netdev/list/
 S:     Supported
 F:     drivers/net/ethernet/mellanox/mlx5/core/
 F:     include/linux/mlx5/
 
-Mellanox MLX5 IB driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX5 IB driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
 S:     Supported
-F:     include/linux/mlx5/
 F:     drivers/infiniband/hw/mlx5/
+F:     include/linux/mlx5/
 
 MELEXIS MLX90614 DRIVER
 M:     Crt Mori <cmo@melexis.com>
@@ -7338,7 +7362,7 @@ F:        drivers/tty/isicom.c
 F:     include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -7702,6 +7726,12 @@ W:       https://github.com/jonmason/ntb/wiki
 T:     git git://github.com/jonmason/ntb.git
 F:     drivers/ntb/hw/intel/
 
+NTB AMD DRIVER
+M:     Xiangliang Yu <Xiangliang.Yu@amd.com>
+L:     linux-ntb@googlegroups.com
+S:     Supported
+F:     drivers/ntb/hw/amd/
+
 NTFS FILESYSTEM
 M:     Anton Altaparmakov <anton@tuxera.com>
 L:     linux-ntfs-dev@lists.sourceforge.net
@@ -7901,7 +7931,7 @@ F:        drivers/media/platform/omap3isp/
 F:     drivers/staging/media/omap4iss/
 
 OMAP USB SUPPORT
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 L:     linux-omap@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -8780,6 +8810,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://github.com/rjarzmik/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/pxa*
 F:     arch/arm/mach-pxa/
 F:     drivers/dma/pxa*
 F:     drivers/pcmcia/pxa2xx*
@@ -8809,6 +8840,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://git.linaro.org/people/ycmiao/pxa-linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/mmp*
 F:     arch/arm/mach-mmp/
 
 PXA MMCI DRIVER
@@ -8975,6 +9007,12 @@ L:       linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     drivers/video/fbdev/aty/aty128fb.c
 
+RALINK MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/ralink
+
 RALINK RT2X00 WIRELESS LAN DRIVER
 P:     rt2x00 project
 M:     Stanislaw Gruszka <sgruszka@redhat.com>
@@ -10114,6 +10152,7 @@ S:      Supported
 F:     drivers/media/pci/solo6x10/
 
 SOFTWARE RAID (Multiple Disks) SUPPORT
+M:     Shaohua Li <shli@kernel.org>
 L:     linux-raid@vger.kernel.org
 T:     git git://neil.brown.name/md
 S:     Supported
@@ -10129,7 +10168,7 @@ F:      drivers/net/ethernet/natsemi/sonic.*
 
 SONICS SILICON BACKPLANE DRIVER (SSB)
 M:     Michael Buesch <m@bues.ch>
-L:     netdev@vger.kernel.org
+L:     linux-wireless@vger.kernel.org
 S:     Maintained
 F:     drivers/ssb/
 F:     include/linux/ssb/
@@ -10247,6 +10286,7 @@ L:      spear-devel@list.st.com
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
+F:     arch/arm/boot/dts/spear*
 F:     arch/arm/mach-spear/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
@@ -10453,9 +10493,11 @@ S:     Maintained
 F:     drivers/net/ethernet/dlink/sundance.c
 
 SUPERH
+M:     Yoshinori Sato <ysato@users.sourceforge.jp>
+M:     Rich Felker <dalias@libc.org>
 L:     linux-sh@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-sh/list/
-S:     Orphan
+S:     Maintained
 F:     Documentation/sh/
 F:     arch/sh/
 F:     drivers/sh/
@@ -11272,7 +11314,7 @@ F:      Documentation/usb/ehci.txt
 F:     drivers/usb/host/ehci*
 
 USB GADGET/PERIPHERAL SUBSYSTEM
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 W:     http://www.linux-usb.org/gadget
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -11348,7 +11390,7 @@ S:      Maintained
 F:     drivers/net/usb/pegasus.*
 
 USB PHY LAYER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -12087,7 +12129,7 @@ F:      drivers/net/hamradio/*scc.c
 F:     drivers/net/hamradio/z8530.h
 
 ZBUD COMPRESSED PAGE ALLOCATOR
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zbud.c
@@ -12142,7 +12184,7 @@ F:      include/linux/zsmalloc.h
 F:     Documentation/vm/zsmalloc.txt
 
 ZSWAP COMPRESSED SWAP CACHING
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zswap.c
index abfb3e8..6828408 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 4
+PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc3
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
index 5c0e5cc..c6b6175 100644 (file)
@@ -153,10 +153,9 @@ choice
                  mobile SoCs in the Kona family of chips (e.g. bcm28155,
                  bcm11351, etc...)
 
-       config DEBUG_BCM63XX
+       config DEBUG_BCM63XX_UART
                bool "Kernel low-level debugging on BCM63XX UART"
                depends on ARCH_BCM_63XX
-               select DEBUG_UART_BCM63XX
 
        config DEBUG_BERLIN_UART
                bool "Marvell Berlin SoC Debug UART"
@@ -1414,7 +1413,7 @@ config DEBUG_LL_INCLUDE
        default "debug/vf.S" if DEBUG_VF_UART
        default "debug/vt8500.S" if DEBUG_VT8500_UART0
        default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
-       default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX
+       default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART
        default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0
        default "mach/debug-macro.S"
 
@@ -1428,10 +1427,6 @@ config DEBUG_UART_8250
                ARCH_IOP13XX || ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || \
                ARCH_RPC
 
-# Compatibility options for BCM63xx
-config DEBUG_UART_BCM63XX
-       def_bool ARCH_BCM_63XX
-
 config DEBUG_UART_PHYS
        hex "Physical base address of debug UART"
        default 0x00100a00 if DEBUG_NETX_UART
@@ -1529,7 +1524,7 @@ config DEBUG_UART_PHYS
        default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
        default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
        default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
-       default 0xfffe8600 if DEBUG_UART_BCM63XX
+       default 0xfffe8600 if DEBUG_BCM63XX_UART
        default 0xfffff700 if ARCH_IOP33X
        depends on ARCH_EP93XX || \
                DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
@@ -1542,7 +1537,7 @@ config DEBUG_UART_PHYS
                DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
                DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 || \
                DEBUG_AT91_UART
 
@@ -1588,7 +1583,7 @@ config DEBUG_UART_VIRT
        default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT
        default 0xfc40ab00 if DEBUG_BRCMSTB_UART
        default 0xfc705000 if DEBUG_ZTE_ZX
-       default 0xfcfe8600 if DEBUG_UART_BCM63XX
+       default 0xfcfe8600 if DEBUG_BCM63XX_UART
        default 0xfd000000 if DEBUG_SPEAR3XX || DEBUG_SPEAR13XX
        default 0xfd012000 if DEBUG_MVEBU_UART0_ALTERNATE && ARCH_MV78XX0
        default 0xfd883000 if DEBUG_ALPINE_UART0
@@ -1638,7 +1633,7 @@ config DEBUG_UART_VIRT
                DEBUG_NETX_UART || \
                DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0
 
 config DEBUG_UART_8250_SHIFT
index 4c23a68..7a6a58e 100644 (file)
@@ -106,6 +106,15 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
index 04885f9..1fafaad 100644 (file)
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index df955ba..92068fb 100644 (file)
@@ -73,7 +73,7 @@
        global_timer: timer@48240200 {
                compatible = "arm,cortex-a9-global-timer";
                reg = <0x48240200 0x100>;
-               interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
@@ -81,7 +81,7 @@
        local_timer: timer@48240600 {
                compatible = "arm,cortex-a9-twd-timer";
                reg = <0x48240600 0x100>;
-               interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index 64d4332..ecd09ab 100644 (file)
                pinctrl-names = "default";
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
-               interrupt-parent = <&gpio3>;
-               interrupts = <22 0>;
 
                attb-gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
 
                 * 0x264 represents the offset of padconf register of
                 * gpio3_22 from am43xx_pinmux base.
                 */
-               interrupts-extended = <&gpio3 22 IRQ_TYPE_NONE>,
+               interrupts-extended = <&gpio3 22 IRQ_TYPE_EDGE_FALLING>,
                                      <&am43xx_pinmux 0x264>;
                interrupt-names = "tsc", "wakeup";
 
index 746fd2b..d580e2b 100644 (file)
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
                interrupt-parent = <&gpio1>;
-               interrupts = <17 0>;
+               interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
 
                attb-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
 
index c538826..8d93882 100644 (file)
                        DRA7XX_CORE_IOPAD(0x35b8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d20.rgmii1_rd3 */
                        DRA7XX_CORE_IOPAD(0x35bc, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d21.rgmii1_rd2 */
                        DRA7XX_CORE_IOPAD(0x35c0, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d22.rgmii1_rd1 */
-                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLUP | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
+                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
                >;
        };
 
        pinctrl-names = "default";
        pinctrl-0 = <&qspi1_pins>;
 
-       spi-max-frequency = <20000000>;
+       spi-max-frequency = <48000000>;
 
        spi_flash: spi_flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
                compatible = "spansion,m25p80", "jedec,spi-nor";
                reg = <0>;                              /* CS0 */
-               spi-max-frequency = <20000000>;
+               spi-max-frequency = <48000000>;
 
                partition@0 {
                        label = "uboot";
 
 &cpsw_emac0 {
        phy_id = <&davinci_mdio>, <0>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <0>;
 };
 
 &cpsw_emac1 {
        phy_id = <&davinci_mdio>, <1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <1>;
 };
 
 };
 
 &usb2 {
-       dr_mode = "peripheral";
+       dr_mode = "host";
 };
 
 &mcasp3 {
index 77bb8e1..988e996 100644 (file)
@@ -25,8 +25,8 @@
 &dra7_pmx_core {
        uart3_pins_default: uart3_pins_default {
                pinctrl-single,pins = <
-                       DRA7XX_CORE_IOPAD(0x37f8, PIN_INPUT_SLEW | MUX_MODE2)   /* uart2_ctsn.uart3_rxd */
-                       DRA7XX_CORE_IOPAD(0x37fc, PIN_INPUT_SLEW | MUX_MODE1)   /* uart2_rtsn.uart3_txd */
+                       DRA7XX_CORE_IOPAD(0x3648, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_rxd */
+                       DRA7XX_CORE_IOPAD(0x364c, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_txd */
                >;
        };
 
        pinctrl-0 = <&i2c5_pins_default>;
        clock-frequency = <400000>;
 
-       eeprom_base: atmel@50 {
+       eeprom_base: atmel@54 {
                compatible = "atmel,24c08";
-               reg = <0x50>;
+               reg = <0x54>;
                pagesize = <16>;
        };
 
index 13cf69a..fb9e1bb 100644 (file)
                                nand-on-flash-bbt;
 
                                partitions {
+                                       compatible = "fixed-partitions";
                                        #address-cells = <1>;
                                        #size-cells = <1>;
 
index 77ddff0..e683856 100644 (file)
 
                        macb0: ethernet@f8008000 {
                                pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_macb0_default>;
+                               pinctrl-0 = <&pinctrl_macb0_default &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioA>;
+                                       interrupts = <73 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        pdmic@f8018000 {
                                        bias-disable;
                                };
 
+                               pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                       pinmux = <PIN_PC9__GPIO>;
+                               };
+
                                pinctrl_pdmic_default: pdmic_default {
                                        pinmux = <PIN_PB26__PDMIC_DAT>,
                                                <PIN_PB27__PDMIC_CLK>;
index 131614f..569026e 100644 (file)
                        macb0: ethernet@f8020000 {
                                phy-mode = "rmii";
                                status = "okay";
+                               pinctrl-names = "default";
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
 
                                phy0: ethernet-phy@1 {
                                        interrupt-parent = <&pioE>;
-                                       interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
                                        reg = <1>;
                                };
                        };
                                                atmel,pins =
                                                        <AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
                                        };
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq_0 {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+                                       };
                                };
                        };
                };
index 2d4a331..4e98cda 100644 (file)
                        };
 
                        macb0: ethernet@f8020000 {
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioE>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        mmc1: mmc@fc000000 {
 
                        pinctrl@fc06a000 {
                                board {
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+                                       };
                                        pinctrl_mmc0_cd: mmc0_cd {
                                                atmel,pins =
                                                        <AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
index ca4ddf8..626c67d 100644 (file)
        };
 
        panel: panel {
-               compatible = "qd,qd43003c0-40", "simple-panel";
+               compatible = "qiaodian,qd43003c0-40", "simple-panel";
                backlight = <&backlight>;
                power-supply = <&panel_reg>;
                #address-cells = <1>;
index 09eed3c..36eec73 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WVL/VL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio0 45 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 46 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 47 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswvl:red:alarm";
-                       gpios = <&gpio0 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
                };
 
                led@2 {
                        label = "lswvl:red:func";
-                       gpios = <&gpio0 37 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
 
                led@3 {
                        label = "lswvl:amber:info";
-                       gpios = <&gpio0 38 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
                };
 
                led@4 {
                        label = "lswvl:blue:func";
-                       gpios = <&gpio0 39 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
                };
 
                led@5 {
                        label = "lswvl:blue:power";
-                       gpios = <&gpio0 40 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
                        default-state = "keep";
                };
 
                led@6 {
                        label = "lswvl:red:hdderr0";
-                       gpios = <&gpio0 34 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswvl:red:hdderr1";
-                       gpios = <&gpio0 35 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
                };
        };
 
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 43 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 11 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
index f5db16a..b13ec20 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WXL/WSXL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio1 41 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 42 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 43 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 11 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswxl:blue:func";
-                       gpios = <&gpio1 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
                };
 
                led@2 {
                        label = "lswxl:red:alarm";
-                       gpios = <&gpio1 49 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
                };
 
                led@3 {
 
                led@4 {
                        label = "lswxl:blue:power";
-                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+                       default-state = "keep";
                };
 
                led@5 {
                        label = "lswxl:red:func";
-                       gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
-                       default-state = "keep";
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@6 {
                        label = "lswxl:red:hdderr0";
-                       gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswxl:red:hdderr1";
-                       gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>;
                };
        };
 
                pinctrl-0 = <&pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                pinctrl-names = "default";
 
-               gpios = <&gpio0 47 GPIO_ACTIVE_LOW
-                        &gpio0 48 GPIO_ACTIVE_LOW>;
+               gpios = <&gpio1 16 GPIO_ACTIVE_LOW
+                        &gpio1 15 GPIO_ACTIVE_LOW>;
 
                gpio-fan,speed-map = <0 3
                                1500 2
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio1 49 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
                        enable-active-high;
                        regulator-always-on;
                        regulator-boot-on;
-                       gpio = <&gpio0 37 GPIO_ACTIVE_HIGH>;
+                       gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
                hdd_power0: regulator@2 {
                        compatible = "regulator-fixed";
index 1db6f2c..8082d64 100644 (file)
        chip-delay = <40>;
        status = "okay";
        partitions {
+               compatible = "fixed-partitions";
                #address-cells = <1>;
                #size-cells = <1>;
 
index 7fed0bd..0080532 100644 (file)
        clock-frequency = <400000>;
 };
 
-&i2c2 {
-       clock-frequency = <400000>;
-};
-
-&i2c3 {
-       clock-frequency = <400000>;
-};
-
 /*
  * Only found on the wireless SOM. For the SOM without wireless, the pins for
  * MMC3 can be routed with jumpers to the second MMC slot on the devkit and
                interrupt-parent = <&gpio5>;
                interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
                ref-clock-frequency = <26000000>;
+               tcxo-clock-frequency = <26000000>;
        };
 };
 
index 888412c..902657d 100644 (file)
        };
 };
 
+&gpio8 {
+       /* TI trees use GPIO instead of msecure, see also muxing */
+       p234 {
+               gpio-hog;
+               gpios = <10 GPIO_ACTIVE_HIGH>;
+               output-high;
+               line-name = "gpio8_234/msecure";
+       };
+};
+
 &omap5_pmx_core {
        pinctrl-names = "default";
        pinctrl-0 = <
                >;
        };
 
+       /* TI trees use GPIO mode; msecure mode does not work reliably? */
+       palmas_msecure_pins: palmas_msecure_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */
+               >;
+       };
+
        usbhost_pins: pinmux_usbhost_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x0c4, PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */
                        &usbhost_wkup_pins
        >;
 
+       palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */
+               >;
+       };
+
        usbhost_wkup_pins: pinmux_usbhost_wkup_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x05a, PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
                interrupt-controller;
                #interrupt-cells = <2>;
                ti,system-power-controller;
+               pinctrl-names = "default";
+               pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>;
 
                extcon_usb3: palmas_usb {
                        compatible = "ti,palmas-usb-vid";
                        #clock-cells = <0>;
                };
 
+               rtc {
+                       compatible = "ti,palmas-rtc";
+                       interrupt-parent = <&palmas>;
+                       interrupts = <8 IRQ_TYPE_NONE>;
+                       ti,backup-battery-chargeable;
+                       ti,backup-battery-charge-high-current;
+               };
+
                palmas_pmic {
                        compatible = "ti,palmas-pmic";
                        interrupt-parent = <&palmas>;
index 3daec91..4207882 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WTGL
  *
- * Copyright (C) 2015, Roger Shimizu <rogershimizu@gmail.com>
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -69,8 +70,6 @@
 
                internal-regs {
                        pinctrl: pinctrl@10000 {
-                               pinctrl-0 = <&pmx_usb_power &pmx_power_hdd
-                                       &pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                                pinctrl-names = "default";
 
                                pmx_led_power: pmx-leds {
                led@1 {
                        label = "lswtgl:blue:power";
                        gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+                       default-state = "keep";
                };
 
                led@2 {
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 2 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
index 78a21f2..c548cab 100644 (file)
 };
 
 &extal1_clk {
-       clock-frequency = <25000000>;
+       clock-frequency = <24000000>;
 };
 &extal2_clk {
        clock-frequency = <48000000>;
index b8032bc..db1151c 100644 (file)
                        dbgu: serial@fc069000 {
                                compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
                                reg = <0xfc069000 0x200>;
-                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>;
+                               interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>;
                                pinctrl-names = "default";
                                pinctrl-0 = <&pinctrl_dbgu>;
                                clocks = <&dbgu_clk>;
index d0c7438..27a333e 100644 (file)
                        };
                        mmcsd_default_mode: mmcsd_default {
                                mmcsd_default_cfg1 {
-                                       /* MCCLK */
-                                       pins = "GPIO8_B10";
-                                       ste,output = <0>;
-                               };
-                               mmcsd_default_cfg2 {
-                                       /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */
-                                       pins = "GPIO10_C11", "GPIO15_A12",
-                                       "GPIO16_C13", "GPIO23_D15";
-                                       ste,output = <1>;
-                               };
-                               mmcsd_default_cfg3 {
-                                       /* MCCMD, MCDAT3-0, MCMSFBCLK */
-                                       pins = "GPIO9_A10", "GPIO11_B11",
-                                       "GPIO12_A11", "GPIO13_C12",
-                                       "GPIO14_B12", "GPIO24_C15";
-                                       ste,input = <1>;
+                                       /*
+                                        * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2
+                                        * MCCMD, MCDAT3-0, MCMSFBCLK
+                                        */
+                                       pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11",
+                                              "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12",
+                                              "GPIO16_C13", "GPIO23_D15", "GPIO24_C15";
+                                       ste,output = <2>;
                                };
                        };
                };
                        clock-names = "mclk", "apb_pclk";
                        interrupt-parent = <&vica>;
                        interrupts = <22>;
-                       max-frequency = <48000000>;
+                       max-frequency = <400000>;
                        bus-width = <4>;
                        cap-mmc-highspeed;
                        cap-sd-highspeed;
+                       full-pwr-cycle;
+                       /*
+                        * The STw4811 circuit used with the Nomadik strictly
+                        * requires that all of these signal direction pins be
+                        * routed and used for its 4-bit levelshifter.
+                        */
+                       st,sig-dir-dat0;
+                       st,sig-dir-dat2;
+                       st,sig-dir-dat31;
+                       st,sig-dir-cmd;
+                       st,sig-pin-fbclk;
                        pinctrl-names = "default";
                        pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>;
                        vmmc-supply = <&vmmc_regulator>;
index 314f6be..8e8b2ac 100644 (file)
@@ -426,6 +426,7 @@ CONFIG_SUNXI_WATCHDOG=y
 CONFIG_IMX2_WDT=y
 CONFIG_TEGRA_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=y
+CONFIG_DW_WATCHDOG=y
 CONFIG_DIGICOLOR_WATCHDOG=y
 CONFIG_MFD_AS3711=y
 CONFIG_MFD_AS3722=y
index c5e1943..a715174 100644 (file)
@@ -50,6 +50,7 @@ CONFIG_SOC_AM33XX=y
 CONFIG_SOC_AM43XX=y
 CONFIG_SOC_DRA7XX=y
 CONFIG_ARM_THUMBEE=y
+CONFIG_ARM_KERNMEM_PERMS=y
 CONFIG_ARM_ERRATA_411920=y
 CONFIG_ARM_ERRATA_430973=y
 CONFIG_SMP=y
@@ -177,6 +178,7 @@ CONFIG_TI_CPTS=y
 CONFIG_AT803X_PHY=y
 CONFIG_SMSC_PHY=y
 CONFIG_USB_USBNET=m
+CONFIG_USB_NET_SMSC75XX=m
 CONFIG_USB_NET_SMSC95XX=m
 CONFIG_USB_ALI_M5632=y
 CONFIG_USB_AN2720=y
@@ -354,6 +356,11 @@ CONFIG_USB_MUSB_DSPS=m
 CONFIG_USB_INVENTRA_DMA=y
 CONFIG_USB_TI_CPPI41_DMA=y
 CONFIG_USB_DWC3=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_TEST=m
 CONFIG_AM335X_PHY_USB=y
 CONFIG_USB_GADGET=m
@@ -387,6 +394,7 @@ CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=m
 CONFIG_LEDS_GPIO=m
 CONFIG_LEDS_PWM=m
+CONFIG_LEDS_PCA963X=m
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_ONESHOT=m
@@ -449,6 +457,8 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_SPLIT=y
+CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
index ede692f..5dd2528 100644 (file)
 #define __NR_userfaultfd               (__NR_SYSCALL_BASE+388)
 #define __NR_membarrier                        (__NR_SYSCALL_BASE+389)
 #define __NR_mlock2                    (__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range           (__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
index ac368bb..dfc7cd6 100644 (file)
                CALL(sys_userfaultfd)
                CALL(sys_membarrier)
                CALL(sys_mlock2)
+               CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index 9cda974..d7f1d69 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/mailbox-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
@@ -66,32 +65,6 @@ static int __init omap3_l3_init(void)
 }
 omap_postcore_initcall(omap3_l3_init);
 
-#if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE)
-static inline void __init omap_init_mbox(void)
-{
-       struct omap_hwmod *oh;
-       struct platform_device *pdev;
-       struct omap_mbox_pdata *pdata;
-
-       oh = omap_hwmod_lookup("mailbox");
-       if (!oh) {
-               pr_err("%s: unable to find hwmod\n", __func__);
-               return;
-       }
-       if (!oh->dev_attr) {
-               pr_err("%s: hwmod doesn't have valid attrs\n", __func__);
-               return;
-       }
-
-       pdata = (struct omap_mbox_pdata *)oh->dev_attr;
-       pdev = omap_device_build("omap-mailbox", -1, oh, pdata, sizeof(*pdata));
-       WARN(IS_ERR(pdev), "%s: could not build device, err %ld\n",
-                                               __func__, PTR_ERR(pdev));
-}
-#else
-static inline void omap_init_mbox(void) { }
-#endif /* CONFIG_OMAP2PLUS_MBOX */
-
 static inline void omap_init_sti(void) {}
 
 #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE)
@@ -229,7 +202,6 @@ static int __init omap2_init_devices(void)
                 * please keep these calls, and their implementations above,
                 * in alphabetical order so they're easier to sort through.
                 */
-               omap_init_mbox();
                omap_init_mcspi();
                omap_init_sham();
                omap_init_aes();
index e781e4f..a935d28 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/platform_data/pinctrl-single.h>
 #include <linux/platform_data/iommu-omap.h>
 #include <linux/platform_data/wkup_m3.h>
+#include <linux/platform_data/pwm_omap_dmtimer.h>
+#include <plat/dmtimer.h>
 
 #include "common.h"
 #include "common-board-devices.h"
@@ -449,6 +451,24 @@ void omap_auxdata_legacy_init(struct device *dev)
        dev->platform_data = &twl_gpio_auxdata;
 }
 
+/* Dual mode timer PWM callbacks platdata */
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = {
+       .request_by_node = omap_dm_timer_request_by_node,
+       .free = omap_dm_timer_free,
+       .enable = omap_dm_timer_enable,
+       .disable = omap_dm_timer_disable,
+       .get_fclk = omap_dm_timer_get_fclk,
+       .start = omap_dm_timer_start,
+       .stop = omap_dm_timer_stop,
+       .set_load = omap_dm_timer_set_load,
+       .set_match = omap_dm_timer_set_match,
+       .set_pwm = omap_dm_timer_set_pwm,
+       .set_prescaler = omap_dm_timer_set_prescaler,
+       .write_counter = omap_dm_timer_write_counter,
+};
+#endif
+
 /*
  * Few boards still need auxdata populated before we populate
  * the dev entries in of_platform_populate().
@@ -502,6 +522,9 @@ static struct of_dev_auxdata omap_auxdata_lookup[] __initdata = {
        OF_DEV_AUXDATA("ti,am4372-wkup-m3", 0x44d00000, "44d00000.wkup_m3",
                       &wkup_m3_data),
 #endif
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+       OF_DEV_AUXDATA("ti,omap-dmtimer-pwm", 0, NULL, &pwm_dmtimer_pdata),
+#endif
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
        OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu",
                       &omap4_iommu_pdata),
index eafd120..1b9f052 100644 (file)
@@ -86,13 +86,18 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
        stmfd   sp!, {lr}       @ save registers on stack
        /* Setup so that we will disable and enable l2 */
        mov     r1, #0x1
-       adrl    r2, l2dis_3630  @ may be too distant for plain adr
-       str     r1, [r2]
+       adrl    r3, l2dis_3630_offset   @ may be too distant for plain adr
+       ldr     r2, [r3]                @ value for offset
+       str     r1, [r2, r3]            @ write to l2dis_3630
        ldmfd   sp!, {pc}       @ restore regs and return
 ENDPROC(enable_omap3630_toggle_l2_on_restore)
 
-       .text
-/* Function to call rom code to save secure ram context */
+/*
+ * Function to call rom code to save secure ram context. This gets
+ * relocated to SRAM, so it can be all in .data section. Otherwise
+ * we need to initialize api_params separately.
+ */
+       .data
        .align  3
 ENTRY(save_secure_ram_context)
        stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
@@ -126,6 +131,8 @@ ENDPROC(save_secure_ram_context)
 ENTRY(save_secure_ram_context_sz)
        .word   . - save_secure_ram_context
 
+       .text
+
 /*
  * ======================
  * == Idle entry point ==
@@ -289,12 +296,6 @@ wait_sdrc_ready:
        bic     r5, r5, #0x40
        str     r5, [r4]
 
-/*
- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
- * base instead.
- * Be careful not to clobber r7 when maintaing this code.
- */
-
 is_dll_in_lock_mode:
        /* Is dll in lock mode? */
        ldr     r4, sdrc_dlla_ctrl
@@ -302,11 +303,7 @@ is_dll_in_lock_mode:
        tst     r5, #0x4
        bne     exit_nonoff_modes       @ Return if locked
        /* wait till dll locks */
-       adr     r7, kick_counter
 wait_dll_lock_timed:
-       ldr     r4, wait_dll_lock_counter
-       add     r4, r4, #1
-       str     r4, [r7, #wait_dll_lock_counter - kick_counter]
        ldr     r4, sdrc_dlla_status
        /* Wait 20uS for lock */
        mov     r6, #8
@@ -330,9 +327,6 @@ kick_dll:
        orr     r6, r6, #(1<<3)         @ enable dll
        str     r6, [r4]
        dsb
-       ldr     r4, kick_counter
-       add     r4, r4, #1
-       str     r4, [r7]                @ kick_counter
        b       wait_dll_lock_timed
 
 exit_nonoff_modes:
@@ -360,15 +354,6 @@ sdrc_dlla_status:
        .word   SDRC_DLLA_STATUS_V
 sdrc_dlla_ctrl:
        .word   SDRC_DLLA_CTRL_V
-       /*
-        * When exporting to userspace while the counters are in SRAM,
-        * these 2 words need to be at the end to facilitate retrival!
-        */
-kick_counter:
-       .word   0
-wait_dll_lock_counter:
-       .word   0
-
 ENTRY(omap3_do_wfi_sz)
        .word   . - omap3_do_wfi
 
@@ -437,7 +422,9 @@ ENTRY(omap3_restore)
        cmp     r2, #0x0        @ Check if target power state was OFF or RET
        bne     logic_l1_restore
 
-       ldr     r0, l2dis_3630
+       adr     r1, l2dis_3630_offset   @ address for offset
+       ldr     r0, [r1]                @ value for offset
+       ldr     r0, [r1, r0]            @ value at l2dis_3630
        cmp     r0, #0x1        @ should we disable L2 on 3630?
        bne     skipl2dis
        mrc     p15, 0, r0, c1, c0, 1
@@ -449,12 +436,14 @@ skipl2dis:
        and     r1, #0x700
        cmp     r1, #0x300
        beq     l2_inv_gp
+       adr     r0, l2_inv_api_params_offset
+       ldr     r3, [r0]
+       add     r3, r3, r0              @ r3 points to dummy parameters
        mov     r0, #40                 @ set service ID for PPA
        mov     r12, r0                 @ copy secure Service ID in r12
        mov     r1, #0                  @ set task id for ROM code in r1
        mov     r2, #4                  @ set some flags in r2, r6
        mov     r6, #0xff
-       adr     r3, l2_inv_api_params   @ r3 points to dummy parameters
        dsb                             @ data write barrier
        dmb                             @ data memory barrier
        smc     #1                      @ call SMI monitor (smi #1)
@@ -488,8 +477,8 @@ skipl2dis:
        b       logic_l1_restore
 
        .align
-l2_inv_api_params:
-       .word   0x1, 0x00
+l2_inv_api_params_offset:
+       .long   l2_inv_api_params - .
 l2_inv_gp:
        /* Execute smi to invalidate L2 cache */
        mov r12, #0x1                   @ set up to invalidate L2
@@ -506,7 +495,9 @@ l2_inv_gp:
        mov     r12, #0x2
        smc     #0                      @ Call SMI monitor (smieq)
 logic_l1_restore:
-       ldr     r1, l2dis_3630
+       adr     r0, l2dis_3630_offset   @ adress for offset
+       ldr     r1, [r0]                @ value for offset
+       ldr     r1, [r0, r1]            @ value at l2dis_3630
        cmp     r1, #0x1                @ Test if L2 re-enable needed on 3630
        bne     skipl2reen
        mrc     p15, 0, r1, c1, c0, 1
@@ -535,9 +526,17 @@ control_stat:
        .word   CONTROL_STAT
 control_mem_rta:
        .word   CONTROL_MEM_RTA_CTRL
+l2dis_3630_offset:
+       .long   l2dis_3630 - .
+
+       .data
 l2dis_3630:
        .word   0
 
+       .data
+l2_inv_api_params:
+       .word   0x1, 0x00
+
 /*
  * Internal functions
  */
index 9b09d85..c7a3b4a 100644 (file)
        dsb
 .endm
 
-ppa_zero_params:
-       .word           0x0
-
-ppa_por_params:
-       .word           1, 0
-
 #ifdef CONFIG_ARCH_OMAP4
 
 /*
@@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume)
        beq     skip_ns_smp_enable
 ppa_actrl_retry:
        mov     r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
-       adr     r3, ppa_zero_params             @ Pointer to parameters
+       adr     r1, ppa_zero_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_zero_params
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
        mov     r6, #0xff
@@ -303,7 +299,9 @@ skip_ns_smp_enable:
        ldr     r0, =OMAP4_PPA_L2_POR_INDEX
        ldr     r1, =OMAP44XX_SAR_RAM_BASE
        ldr     r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-       adr     r3, ppa_por_params
+       adr     r1, ppa_por_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_por_params
        str     r4, [r3, #0x04]
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
@@ -328,6 +326,8 @@ skip_l2en:
 #endif
 
        b       cpu_resume                      @ Jump to generic resume
+ppa_por_params_offset:
+       .long   ppa_por_params - .
 ENDPROC(omap4_cpu_resume)
 #endif /* CONFIG_ARCH_OMAP4 */
 
@@ -380,4 +380,13 @@ ENTRY(omap_do_wfi)
        nop
 
        ldmfd   sp!, {pc}
+ppa_zero_params_offset:
+       .long   ppa_zero_params - .
 ENDPROC(omap_do_wfi)
+
+       .data
+ppa_zero_params:
+       .word           0
+
+ppa_por_params:
+       .word           1, 0
index def40a0..70ab4a2 100644 (file)
@@ -1,5 +1,6 @@
 menuconfig ARCH_REALVIEW
-       bool "ARM Ltd. RealView family" if ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
+       bool "ARM Ltd. RealView family"
+       depends on ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
        select ARM_AMBA
        select ARM_TIMER_SP804
        select COMMON_CLK_VERSATILE
index 8be6632..dae8d86 100644 (file)
@@ -4,10 +4,9 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
        -I$(srctree)/arch/arm/plat-versatile/include
 
-
+obj-y                                  := core.o
 obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
 obj-$(CONFIG_SMP)                      += platsmp-dt.o
-obj-y                                  := core.o
 
 ifdef CONFIG_ATAGS
 obj-$(CONFIG_MACH_REALVIEW_EB)         += realview_eb.o
index 6558539..6964e88 100644 (file)
@@ -80,7 +80,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus)
                     virt_to_phys(versatile_secondary_startup));
 }
 
-struct smp_operations realview_dt_smp_ops __initdata = {
+static const struct smp_operations realview_dt_smp_ops __initconst = {
        .smp_prepare_cpus       = realview_smp_prepare_cpus,
        .smp_secondary_init     = versatile_secondary_init,
        .smp_boot_secondary     = versatile_boot_secondary,
index d6a3714..ebe15b9 100644 (file)
@@ -1,5 +1,6 @@
 config ARCH_TANGO
-       bool "Sigma Designs Tango4 (SMP87xx)" if ARCH_MULTI_V7
+       bool "Sigma Designs Tango4 (SMP87xx)"
+       depends on ARCH_MULTI_V7
        # Cortex-A9 MPCore r3p0, PL310 r3p2
        select ARCH_HAS_HOLES_MEMORYMODEL
        select ARM_ERRATA_754322
index a18d5a3..a21f55e 100644 (file)
@@ -9,7 +9,7 @@ static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle)
        return 0;
 }
 
-static struct smp_operations tango_smp_ops __initdata = {
+static const struct smp_operations tango_smp_ops __initconst = {
        .smp_boot_secondary     = tango_boot_secondary,
 };
 
index a90f355..0fa8b84 100644 (file)
@@ -13,57 +13,5 @@ menuconfig ARCH_TEGRA
        select ARCH_HAS_RESET_CONTROLLER
        select RESET_CONTROLLER
        select SOC_BUS
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
        help
          This enables support for NVIDIA Tegra based systems.
-
-if ARCH_TEGRA
-
-config ARCH_TEGRA_2x_SOC
-       bool "Enable support for Tegra20 family"
-       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
-       select ARM_ERRATA_720789
-       select ARM_ERRATA_754327 if SMP
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA20
-       select PL310_ERRATA_727915 if CACHE_L2X0
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra AP20 and T20 processors, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_3x_SOC
-       bool "Enable support for Tegra30 family"
-       select ARM_ERRATA_754322
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA30
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T30 processor family, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_114_SOC
-       bool "Enable support for Tegra114 family"
-       select ARM_ERRATA_798181 if SMP
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA114
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T114 processor family, based on the
-         ARM CortexA15MP CPU
-
-config ARCH_TEGRA_124_SOC
-       bool "Enable support for Tegra124 family"
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA124
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T124 processor family, based on the
-         ARM CortexA15MP CPU
-
-endif
index e6b684e..f5d1966 100644 (file)
@@ -231,8 +231,11 @@ ENDPROC(tegra20_cpu_is_resettable_soon)
  * tegra20_tear_down_core in IRAM
  */
 ENTRY(tegra20_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        mov32   r3, tegra_shut_off_mmu
        add     r3, r3, r0
index 9a2f0b0..16e5ff0 100644 (file)
@@ -242,8 +242,11 @@ ENDPROC(tegra30_cpu_shutdown)
  * tegra30_tear_down_core in IRAM
  */
 ENTRY(tegra30_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        /*
         * Preload all the address literals that are needed for the
index 534a60a..0eca381 100644 (file)
@@ -1200,10 +1200,7 @@ error:
        while (i--)
                if (pages[i])
                        __free_pages(pages[i], 0);
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return NULL;
 }
 
@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                               size_t size, struct dma_attrs *attrs)
 {
        int count = size >> PAGE_SHIFT;
-       int array_size = count * sizeof(struct page *);
        int i;
 
        if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                                __free_pages(pages[i], 0);
        }
 
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return 0;
 }
 
index 2c40041..21074f6 100644 (file)
@@ -105,18 +105,6 @@ config ARCH_TEGRA
        help
          This enables support for the NVIDIA Tegra SoC family.
 
-config ARCH_TEGRA_132_SOC
-       bool "NVIDIA Tegra132 SoC"
-       depends on ARCH_TEGRA
-       select PINCTRL_TEGRA124
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
-       help
-         Enable support for NVIDIA Tegra132 SoC, based on the Denver
-         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
-         but contains an NVIDIA Denver CPU complex in place of
-         Tegra124's "4+1" Cortex-A15 CPU complex.
-
 config ARCH_SPRD
        bool "Spreadtrum SoC platform"
        help
index cd822d8..307237c 100644 (file)
@@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils)
 endif
 
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS  += $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS  += $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
index 76e7510..f832b8a 100644 (file)
@@ -9,6 +9,7 @@ dts-dirs += freescale
 dts-dirs += hisilicon
 dts-dirs += marvell
 dts-dirs += mediatek
+dts-dirs += nvidia
 dts-dirs += qcom
 dts-dirs += renesas
 dts-dirs += rockchip
index dd5158e..e5b59ca 100644 (file)
                             <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
index da7b6e6..933cba3 100644 (file)
@@ -23,9 +23,8 @@ soc0: soc@000000000 {
                };
        };
 
-       dsa: dsa@c7000000 {
+       dsaf0: dsa@c7000000 {
                compatible = "hisilicon,hns-dsaf-v1";
-               dsa_name = "dsaf0";
                mode = "6port-16rss";
                interrupt-parent = <&mbigen_dsa>;
 
@@ -127,7 +126,7 @@ soc0: soc@000000000 {
 
        eth0: ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [00 00 00 01 00 58];
                status = "disabled";
@@ -135,14 +134,14 @@ soc0: soc@000000000 {
        };
        eth1: ethernet@1{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <1>;
                status = "disabled";
                dma-coherent;
        };
        eth2: ethernet@2{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <2>;
                local-mac-address = [00 00 00 01 00 5a];
                status = "disabled";
@@ -150,7 +149,7 @@ soc0: soc@000000000 {
        };
        eth3: ethernet@3{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <3>;
                local-mac-address = [00 00 00 01 00 5b];
                status = "disabled";
@@ -158,7 +157,7 @@ soc0: soc@000000000 {
        };
        eth4: ethernet@4{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <4>;
                local-mac-address = [00 00 00 01 00 5c];
                status = "disabled";
@@ -166,7 +165,7 @@ soc0: soc@000000000 {
        };
        eth5: ethernet@5{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <5>;
                local-mac-address = [00 00 00 01 00 5d];
                status = "disabled";
@@ -174,7 +173,7 @@ soc0: soc@000000000 {
        };
        eth6: ethernet@6{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <6>;
                local-mac-address = [00 00 00 01 00 5e];
                status = "disabled";
@@ -182,7 +181,7 @@ soc0: soc@000000000 {
        };
        eth7: ethernet@7{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <7>;
                local-mac-address = [00 00 00 01 00 5f];
                status = "disabled";
diff --git a/arch/arm64/boot/dts/nvidia/Makefile b/arch/arm64/boot/dts/nvidia/Makefile
new file mode 100644 (file)
index 0000000..a7e865d
--- /dev/null
@@ -0,0 +1,7 @@
+dtb-$(CONFIG_ARCH_TEGRA_132_SOC) += tegra132-norrin.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-0000.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
+
+always         := $(dtb-y)
+clean-files    := *.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
new file mode 100644 (file)
index 0000000..62f33fc
--- /dev/null
@@ -0,0 +1,1132 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra132.dtsi"
+
+/ {
+       model = "NVIDIA Tegra132 Norrin";
+       compatible = "nvidia,norrin", "nvidia,tegra132", "nvidia,tegra124";
+
+       aliases {
+               rtc0 = "/i2c@0,7000d000/as3722@40";
+               rtc1 = "/rtc@0,7000e000";
+       };
+
+       chosen { };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0x80000000>;
+       };
+
+       host1x@0,50000000 {
+               hdmi@0,54280000 {
+                       status = "disabled";
+
+                       vdd-supply = <&vdd_3v3_hdmi>;
+                       pll-supply = <&vdd_hdmi_pll>;
+                       hdmi-supply = <&vdd_5v0_hdmi>;
+
+                       nvidia,ddc-i2c-bus = <&hdmi_ddc>;
+                       nvidia,hpd-gpio =
+                               <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>;
+               };
+
+               sor@0,54540000 {
+                       status = "okay";
+
+                       nvidia,dpaux = <&dpaux>;
+                       nvidia,panel = <&panel>;
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       vdd-supply = <&vdd_3v3_panel>;
+                       status = "okay";
+               };
+       };
+
+       gpu@0,57000000 {
+               status = "okay";
+
+               vdd-supply = <&vdd_gpu>;
+       };
+
+       pinmux@0,70000868 {
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinmux_default>;
+
+               pinmux_default: pinmux@0 {
+                       dap_mclk1_pw4 {
+                               nvidia,pins = "dap_mclk1_pw4";
+                               nvidia,function = "extperiph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_pa4 {
+                               nvidia,pins = "dap2_din_pa4";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap2_dout_pa5 {
+                               nvidia,pins = "dap2_dout_pa5",
+                                             "dap2_fs_pa2",
+                                             "dap2_sclk_pa3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap3_dout_pp2 {
+                               nvidia,pins = "dap3_dout_pp2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_px0 {
+                               nvidia,pins = "dvfs_pwm_px0",
+                                             "dvfs_clk_px2";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_clk_py0 {
+                               nvidia,pins = "ulpi_clk_py0",
+                                             "ulpi_nxt_py2",
+                                             "ulpi_stp_py3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_dir_py1 {
+                               nvidia,pins = "ulpi_dir_py1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       cam_i2c_scl_pbb1 {
+                               nvidia,pins = "cam_i2c_scl_pbb1",
+                                             "cam_i2c_sda_pbb2";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_scl_pt5 {
+                               nvidia,pins = "gen2_i2c_scl_pt5",
+                                             "gen2_i2c_sda_pt6";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       pj7 {
+                               nvidia,pins = "pj7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pk6 {
+                               nvidia,pins = "spdif_in_pk6";
+                               nvidia,function = "spdif";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg4 {
+                               nvidia,pins = "pg4",
+                                             "pg5",
+                                             "pg6",
+                                             "pi3";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg7 {
+                               nvidia,pins = "pg7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ph1 {
+                               nvidia,pins = "ph1";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0",
+                                             "kb_row15_ps7",
+                                             "clk_32k_out_pa0";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_clk_pz0 {
+                               nvidia,pins = "sdmmc1_clk_pz0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_cmd_pz1 {
+                               nvidia,pins = "sdmmc1_cmd_pz1",
+                                             "sdmmc1_dat0_py7",
+                                             "sdmmc1_dat1_py6",
+                                             "sdmmc1_dat2_py5",
+                                             "sdmmc1_dat3_py4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_clk_pa6 {
+                               nvidia,pins = "sdmmc3_clk_pa6";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_cmd_pa7 {
+                               nvidia,pins = "sdmmc3_cmd_pa7",
+                                             "sdmmc3_dat0_pb7",
+                                             "sdmmc3_dat1_pb6",
+                                             "sdmmc3_dat2_pb5",
+                                             "sdmmc3_dat3_pb4",
+                                             "kb_col4_pq4",
+                                             "sdmmc3_clk_lb_out_pee4",
+                                             "sdmmc3_clk_lb_in_pee5",
+                                             "sdmmc3_cd_n_pv2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_clk_pcc4 {
+                               nvidia,pins = "sdmmc4_clk_pcc4";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_cmd_pt7 {
+                               nvidia,pins = "sdmmc4_cmd_pt7",
+                                             "sdmmc4_dat0_paa0",
+                                             "sdmmc4_dat1_paa1",
+                                             "sdmmc4_dat2_paa2",
+                                             "sdmmc4_dat3_paa3",
+                                             "sdmmc4_dat4_paa4",
+                                             "sdmmc4_dat5_paa5",
+                                             "sdmmc4_dat6_paa6",
+                                             "sdmmc4_dat7_paa7";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       mic_det_l {
+                               nvidia,pins = "kb_row7_pr7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row10_ps2 {
+                               nvidia,pins = "kb_row10_ps2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row9_ps1 {
+                               nvidia,pins = "kb_row9_ps1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_pz6 {
+                               nvidia,pins = "pwr_i2c_scl_pz6",
+                                             "pwr_i2c_sda_pz7";
+                               nvidia,function = "i2cpwr";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "rtck";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "pwron";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       kb_col0_ap {
+                               nvidia,pins = "kb_col0_pq0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       en_vdd_sd {
+                               nvidia,pins = "kb_row0_pr0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       lid_open {
+                               nvidia,pins = "kb_row4_pr4";
+                               nvidia,function = "rsvd3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       reset_out_n {
+                               nvidia,pins = "reset_out_n";
+                               nvidia,function = "reset_out_n";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk3_out_pee0 {
+                               nvidia,pins = "clk3_out_pee0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pc4 {
+                               nvidia,pins = "gen1_i2c_scl_pc4",
+                                             "gen1_i2c_sda_pc5";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_cec_pee3 {
+                               nvidia,pins = "hdmi_cec_pee3";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_int_pn7 {
+                               nvidia,pins = "hdmi_int_pn7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ddc_scl_pv4 {
+                               nvidia,pins = "ddc_scl_pv4",
+                                             "ddc_sda_pv5";
+                               nvidia,function = "i2c4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,rcv-sel = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en0_pn4 {
+                               nvidia,pins = "usb_vbus_en0_pn4",
+                                             "usb_vbus_en1_pn5",
+                                             "usb_vbus_en2_pff1";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       drive_sdio1 {
+                               nvidia,pins = "drive_sdio1";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <36>;
+                               nvidia,pull-up-strength = <20>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                       };
+                       drive_sdio3 {
+                               nvidia,pins = "drive_sdio3";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <22>;
+                               nvidia,pull-up-strength = <36>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                       };
+                       drive_gma {
+                               nvidia,pins = "drive_gma";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <2>;
+                               nvidia,pull-up-strength = <1>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,drive-type = <1>;
+                       };
+                       ac_ok {
+                               nvidia,pins = "pj0";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       codec_irq_l {
+                               nvidia,pins = "ph4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       lcd_bl_en {
+                               nvidia,pins = "ph2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_irq_l {
+                               nvidia,pins = "gpio_w3_aud_pw3";
+                               nvidia,function = "spi6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       tpm_davint_l {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_irq_l {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_reset_l {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <1>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ts_shdn_l {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph7 {
+                               nvidia,pins = "ph7";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sensor_irq_l {
+                               nvidia,pins = "pi6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       wifi_en {
+                               nvidia,pins = "gpio_x7_aud_px7";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       chromeos_write_protect {
+                               nvidia,pins = "kb_row1_pr1";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       hp_det_l {
+                               nvidia,pins = "pi7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       soc_warm_reset_l {
+                               nvidia,pins = "pi5";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pwm: pwm@0,7000a000 {
+               status = "okay";
+       };
+
+       /* HDMI DDC */
+       hdmi_ddc: i2c@0,7000c700 {
+               status = "okay";
+               clock-frequency = <100000>;
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+
+               as3722: pmic@40 {
+                       compatible = "ams,as3722";
+                       reg = <0x40>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+
+                       ams,system-power-controller;
+
+                       #interrupt-cells = <2>;
+                       interrupt-controller;
+
+                       #gpio-cells = <2>;
+                       gpio-controller;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&as3722_default>;
+
+                       as3722_default: pinmux@0 {
+                               gpio0 {
+                                       pins = "gpio0";
+                                       function = "gpio";
+                                       bias-pull-down;
+                               };
+
+                               gpio1 {
+                                       pins = "gpio1";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio2_4_7 {
+                                       pins = "gpio2", "gpio4", "gpio7";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio3 {
+                                       pins = "gpio3";
+                                       function = "gpio";
+                                       bias-high-impedance;
+                               };
+
+                               gpio5 {
+                                       pins = "gpio5";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+
+                               gpio6 {
+                                       pins = "gpio6";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+                       };
+
+                       regulators {
+                               vsup-sd2-supply = <&vdd_5v0_sys>;
+                               vsup-sd3-supply = <&vdd_5v0_sys>;
+                               vsup-sd4-supply = <&vdd_5v0_sys>;
+                               vsup-sd5-supply = <&vdd_5v0_sys>;
+                               vin-ldo0-supply = <&vdd_1v35_lp0>;
+                               vin-ldo1-6-supply = <&vdd_3v3_sys>;
+                               vin-ldo2-5-7-supply = <&vddio_1v8>;
+                               vin-ldo3-4-supply = <&vdd_3v3_sys>;
+                               vin-ldo9-10-supply = <&vdd_5v0_sys>;
+                               vin-ldo11-supply = <&vdd_3v3_run>;
+
+                               sd0 {
+                                       regulator-name = "+VDD_CPU_AP";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <2>;
+                               };
+
+                               sd1 {
+                                       regulator-name = "+VDD_CORE";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <4000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               vdd_1v35_lp0: sd2 {
+                                       regulator-name = "+1.35V_LP0(sd2)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               sd3 {
+                                       regulator-name = "+1.35V_LP0(sd3)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_1v05_run: sd4 {
+                                       regulator-name = "+1.05V_RUN";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               vddio_1v8: sd5 {
+                                       regulator-name = "+1.8V_VDDIO";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_gpu: sd6 {
+                                       regulator-name = "+VDD_GPU_AP";
+                                       regulator-min-microvolt = <800000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-min-microamp = <3500000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo0 {
+                                       regulator-name = "+1.05_RUN_AVDD";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               ldo1 {
+                                       regulator-name = "+1.8V_RUN_CAM";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+
+                               ldo2 {
+                                       regulator-name = "+1.2V_GEN_AVDD";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo3 {
+                                       regulator-name = "+1.00V_LP0_VDD_RTC";
+                                       regulator-min-microvolt = <1000000>;
+                                       regulator-max-microvolt = <1000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,enable-tracking;
+                               };
+
+                               vdd_run_cam: ldo4 {
+                                       regulator-name = "+2.8V_RUN_CAM";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo5 {
+                                       regulator-name = "+1.2V_RUN_CAM_FRONT";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                               };
+
+                               vddio_sdmmc3: ldo6 {
+                                       regulator-name = "+VDDIO_SDMMC3";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <3300000>;
+                               };
+
+                               ldo7 {
+                                       regulator-name = "+1.05V_RUN_CAM_REAR";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               ldo9 {
+                                       regulator-name = "+2.8V_RUN_TOUCH";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo10 {
+                                       regulator-name = "+2.8V_RUN_CAM_AF";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo11 {
+                                       regulator-name = "+1.8V_RUN_VPP_FUSE";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+                       };
+               };
+       };
+
+       spi@0,7000d400 {
+               status = "okay";
+
+               ec: cros-ec@0 {
+                       compatible = "google,cros-ec-spi";
+                       spi-max-frequency = <3000000>;
+                       interrupt-parent = <&gpio>;
+                       interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
+                       reg = <0>;
+
+                       google,cros-ec-spi-msg-delay = <2000>;
+
+                       i2c_20: i2c-tunnel {
+                               compatible = "google,cros-ec-i2c-tunnel";
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               google,remote-bus = <0>;
+
+                               charger: bq24735 {
+                                       compatible = "ti,bq24735";
+                                       reg = <0x9>;
+                                       interrupt-parent = <&gpio>;
+                                       interrupts = <TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                                       ti,ac-detect-gpios = <&gpio
+                                                       TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                               };
+
+                               battery: smart-battery {
+                                       compatible = "sbs,sbs-battery";
+                                       reg = <0xb>;
+                                       battery-name = "battery";
+                                       sbs,i2c-retry-count = <2>;
+                                       sbs,poll-retry-count = <10>;
+                               /*      power-supplies = <&charger>; */
+                               };
+                       };
+
+                       keyboard-controller {
+                               compatible = "google,cros-ec-keyb";
+                               keypad,num-rows = <8>;
+                               keypad,num-columns = <13>;
+                               google,needs-ghost-filter;
+                               linux,keymap =
+                                       <MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
+                                        MATRIX_KEY(0x00, 0x02, KEY_F1)
+                                        MATRIX_KEY(0x00, 0x03, KEY_B)
+                                        MATRIX_KEY(0x00, 0x04, KEY_F10)
+                                        MATRIX_KEY(0x00, 0x06, KEY_N)
+                                        MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
+                                        MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
+
+                                        MATRIX_KEY(0x01, 0x01, KEY_ESC)
+                                        MATRIX_KEY(0x01, 0x02, KEY_F4)
+                                        MATRIX_KEY(0x01, 0x03, KEY_G)
+                                        MATRIX_KEY(0x01, 0x04, KEY_F7)
+                                        MATRIX_KEY(0x01, 0x06, KEY_H)
+                                        MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
+                                        MATRIX_KEY(0x01, 0x09, KEY_F9)
+                                        MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+
+                                        MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
+                                        MATRIX_KEY(0x02, 0x01, KEY_TAB)
+                                        MATRIX_KEY(0x02, 0x02, KEY_F3)
+                                        MATRIX_KEY(0x02, 0x03, KEY_T)
+                                        MATRIX_KEY(0x02, 0x04, KEY_F6)
+                                        MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
+                                        MATRIX_KEY(0x02, 0x06, KEY_Y)
+                                        MATRIX_KEY(0x02, 0x07, KEY_102ND)
+                                        MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
+                                        MATRIX_KEY(0x02, 0x09, KEY_F8)
+
+                                        MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
+                                        MATRIX_KEY(0x03, 0x02, KEY_F2)
+                                        MATRIX_KEY(0x03, 0x03, KEY_5)
+                                        MATRIX_KEY(0x03, 0x04, KEY_F5)
+                                        MATRIX_KEY(0x03, 0x06, KEY_6)
+                                        MATRIX_KEY(0x03, 0x08, KEY_MINUS)
+                                        MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+
+                                        MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
+                                        MATRIX_KEY(0x04, 0x01, KEY_A)
+                                        MATRIX_KEY(0x04, 0x02, KEY_D)
+                                        MATRIX_KEY(0x04, 0x03, KEY_F)
+                                        MATRIX_KEY(0x04, 0x04, KEY_S)
+                                        MATRIX_KEY(0x04, 0x05, KEY_K)
+                                        MATRIX_KEY(0x04, 0x06, KEY_J)
+                                        MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
+                                        MATRIX_KEY(0x04, 0x09, KEY_L)
+                                        MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
+                                        MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
+
+                                        MATRIX_KEY(0x05, 0x01, KEY_Z)
+                                        MATRIX_KEY(0x05, 0x02, KEY_C)
+                                        MATRIX_KEY(0x05, 0x03, KEY_V)
+                                        MATRIX_KEY(0x05, 0x04, KEY_X)
+                                        MATRIX_KEY(0x05, 0x05, KEY_COMMA)
+                                        MATRIX_KEY(0x05, 0x06, KEY_M)
+                                        MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
+                                        MATRIX_KEY(0x05, 0x08, KEY_SLASH)
+                                        MATRIX_KEY(0x05, 0x09, KEY_DOT)
+                                        MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
+
+                                        MATRIX_KEY(0x06, 0x01, KEY_1)
+                                        MATRIX_KEY(0x06, 0x02, KEY_3)
+                                        MATRIX_KEY(0x06, 0x03, KEY_4)
+                                        MATRIX_KEY(0x06, 0x04, KEY_2)
+                                        MATRIX_KEY(0x06, 0x05, KEY_8)
+                                        MATRIX_KEY(0x06, 0x06, KEY_7)
+                                        MATRIX_KEY(0x06, 0x08, KEY_0)
+                                        MATRIX_KEY(0x06, 0x09, KEY_9)
+                                        MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
+                                        MATRIX_KEY(0x06, 0x0b, KEY_DOWN)
+                                        MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)
+
+                                        MATRIX_KEY(0x07, 0x01, KEY_Q)
+                                        MATRIX_KEY(0x07, 0x02, KEY_E)
+                                        MATRIX_KEY(0x07, 0x03, KEY_R)
+                                        MATRIX_KEY(0x07, 0x04, KEY_W)
+                                        MATRIX_KEY(0x07, 0x05, KEY_I)
+                                        MATRIX_KEY(0x07, 0x06, KEY_U)
+                                        MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)
+                                        MATRIX_KEY(0x07, 0x08, KEY_P)
+                                        MATRIX_KEY(0x07, 0x09, KEY_O)
+                                        MATRIX_KEY(0x07, 0x0b, KEY_UP)
+                                        MATRIX_KEY(0x07, 0x0c, KEY_LEFT)>;
+                       };
+               };
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+               nvidia,suspend-mode = <0>;
+               #wake-cells = <3>;
+               nvidia,cpu-pwr-good-time = <500>;
+               nvidia,cpu-pwr-off-time = <300>;
+               nvidia,core-pwr-good-time = <641 3845>;
+               nvidia,core-pwr-off-time = <61036>;
+               nvidia,core-power-req-active-high;
+               nvidia,sys-clock-req-active-high;
+               nvidia,reset-gpio = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
+       };
+
+       /* WIFI/BT module */
+       sdhci@0,700b0000 {
+               status = "disabled";
+       };
+
+       /* external SD/MMC */
+       sdhci@0,700b0400 {
+               cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+               power-gpios = <&gpio TEGRA_GPIO(R, 0) GPIO_ACTIVE_HIGH>;
+               wp-gpios = <&gpio TEGRA_GPIO(Q, 4) GPIO_ACTIVE_HIGH>;
+               status = "okay";
+               bus-width = <4>;
+               vqmmc-supply = <&vddio_sdmmc3>;
+       };
+
+       /* EMMC 4.51 */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       usb@0,7d000000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d000000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb1_vbus>;
+       };
+
+       usb@0,7d004000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d004000 {
+               status = "okay";
+               vbus-supply = <&vdd_run_cam>;
+       };
+
+       usb@0,7d008000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d008000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb3_vbus>;
+       };
+
+       backlight: backlight {
+               compatible = "pwm-backlight";
+
+               enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>;
+               power-supply = <&vdd_led>;
+               pwms = <&pwm 1 1000000>;
+
+               brightness-levels = <0 4 8 16 32 64 128 255>;
+               default-brightness-level = <6>;
+
+               backlight-boot-off;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg=<0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               lid {
+                       label = "Lid";
+                       gpios = <&gpio TEGRA_GPIO(R, 4) GPIO_ACTIVE_LOW>;
+                       linux,input-type = <5>;
+                       linux,code = <0>;
+                       debounce-interval = <1>;
+                       gpio-key,wakeup;
+               };
+
+               power {
+                       label = "Power";
+                       gpios = <&gpio TEGRA_GPIO(Q, 0) GPIO_ACTIVE_LOW>;
+                       linux,code = <KEY_POWER>;
+                       debounce-interval = <10>;
+                       gpio-key,wakeup;
+               };
+       };
+
+       panel: panel {
+               compatible = "innolux,n116bge", "simple-panel";
+               backlight = <&backlight>;
+               ddc-i2c-bus = <&dpaux>;
+       };
+
+       regulators {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               vdd_mux: regulator@0 {
+                       compatible = "regulator-fixed";
+                       reg = <0>;
+                       regulator-name = "+VDD_MUX";
+                       regulator-min-microvolt = <19000000>;
+                       regulator-max-microvolt = <19000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+               };
+
+               vdd_5v0_sys: regulator@1 {
+                       compatible = "regulator-fixed";
+                       reg = <1>;
+                       regulator-name = "+5V_SYS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_sys: regulator@2 {
+                       compatible = "regulator-fixed";
+                       reg = <2>;
+                       regulator-name = "+3.3V_SYS";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_run: regulator@3 {
+                       compatible = "regulator-fixed";
+                       reg = <3>;
+                       regulator-name = "+3.3V_RUN";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&as3722 1 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_3v3_hdmi: regulator@4 {
+                       compatible = "regulator-fixed";
+                       reg = <4>;
+                       regulator-name = "+3.3V_AVDD_HDMI_AP_GATED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       vin-supply = <&vdd_3v3_run>;
+               };
+
+               vdd_led: regulator@5 {
+                       compatible = "regulator-fixed";
+                       reg = <5>;
+                       regulator-name = "+VDD_LED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_usb1_vbus: regulator@6 {
+                       compatible = "regulator-fixed";
+                       reg = <6>;
+                       regulator-name = "+5V_USB_HS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 4) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_usb3_vbus: regulator@7 {
+                       compatible = "regulator-fixed";
+                       reg = <7>;
+                       regulator-name = "+5V_USB_SS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 5) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_3v3_panel: regulator@8 {
+                       compatible = "regulator-fixed";
+                       reg = <8>;
+                       regulator-name = "+3.3V_PANEL";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&as3722 4 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_hdmi_pll: regulator@9 {
+                       compatible = "regulator-fixed";
+                       reg = <9>;
+                       regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL_AP_GATE";
+                       regulator-min-microvolt = <1050000>;
+                       regulator-max-microvolt = <1050000>;
+                       gpio = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_LOW>;
+                       vin-supply = <&vdd_1v05_run>;
+               };
+
+               vdd_5v0_hdmi: regulator@10 {
+                       compatible = "regulator-fixed";
+                       reg = <10>;
+                       regulator-name = "+5V_HDMI_CON";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(K, 6) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_5v0_ts: regulator@11 {
+                       compatible = "regulator-fixed";
+                       reg = <11>;
+                       regulator-name = "+5V_VDD_TS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&gpio TEGRA_GPIO(K, 1) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
new file mode 100644 (file)
index 0000000..e8bb460
--- /dev/null
@@ -0,0 +1,990 @@
+#include <dt-bindings/clock/tegra124-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra124-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra132", "nvidia,tegra124";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       pcie-controller@0,01003000 {
+               compatible = "nvidia,tegra124-pcie";
+               device_type = "pci";
+               reg = <0x0 0x01003000 0x0 0x00000800   /* PADS registers */
+                      0x0 0x01003800 0x0 0x00000800   /* AFI registers */
+                      0x0 0x02000000 0x0 0x10000000>; /* configuration space */
+               reg-names = "pads", "afi", "cs";
+               interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, /* controller interrupt */
+                            <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+               interrupt-names = "intr", "msi";
+
+               #interrupt-cells = <1>;
+               interrupt-map-mask = <0 0 0 0>;
+               interrupt-map = <0 0 0 0 &gic GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+               bus-range = <0x00 0xff>;
+               #address-cells = <3>;
+               #size-cells = <2>;
+
+               ranges = <0x82000000 0 0x01000000 0x0 0x01000000 0 0x00001000   /* port 0 configuration space */
+                         0x82000000 0 0x01001000 0x0 0x01001000 0 0x00001000   /* port 1 configuration space */
+                         0x81000000 0 0x0        0x0 0x12000000 0 0x00010000   /* downstream I/O (64 KiB) */
+                         0x82000000 0 0x13000000 0x0 0x13000000 0 0x0d000000   /* non-prefetchable memory (208 MiB) */
+                         0xc2000000 0 0x20000000 0x0 0x20000000 0 0x20000000>; /* prefetchable memory (512 MiB) */
+
+               clocks = <&tegra_car TEGRA124_CLK_PCIE>,
+                        <&tegra_car TEGRA124_CLK_AFI>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>,
+                        <&tegra_car TEGRA124_CLK_CML0>;
+               clock-names = "pex", "afi", "pll_e", "cml";
+               resets = <&tegra_car 70>,
+                        <&tegra_car 72>,
+                        <&tegra_car 74>;
+               reset-names = "pex", "afi", "pcie_x";
+               status = "disabled";
+
+               phys = <&padctl TEGRA_XUSB_PADCTL_PCIE>;
+               phy-names = "pcie";
+
+               pci@1,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82000800 0 0x01000000 0 0x1000>;
+                       reg = <0x000800 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <2>;
+               };
+
+               pci@2,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82001000 0 0x01001000 0 0x1000>;
+                       reg = <0x001000 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <1>;
+               };
+       };
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra124-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA124_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0 0x54000000 0 0x54000000 0 0x01000000>;
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP1>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP2>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               hdmi@0,54280000 {
+                       compatible = "nvidia,tegra124-hdmi";
+                       reg = <0x0 0x54280000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_HDMI>,
+                                <&tegra_car TEGRA124_CLK_PLL_D2_OUT0>;
+                       clock-names = "hdmi", "parent";
+                       resets = <&tegra_car 51>;
+                       reset-names = "hdmi";
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra124-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_SOR0>,
+                                <&tegra_car TEGRA124_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>,
+                                <&tegra_car TEGRA124_CLK_CLK_M>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DPAUX>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,cortex-a15-gic";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gk20a";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA124_CLK_GPU>,
+                        <&tegra_car TEGRA124_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@60004000 {
+               compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+               reg = <0x0 0x60004000 0x0 0x100>,
+                     <0x0 0x60004100 0x0 0x100>,
+                     <0x0 0x60004200 0x0 0x100>,
+                     <0x0 0x60004300 0x0 0x100>,
+                     <0x0 0x60004400 0x0 0x100>;
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra132-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+               nvidia,external-memory-controller = <&emc>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra124-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       actmon@0,6000c800 {
+               compatible = "nvidia,tegra124-actmon";
+               reg = <0x0 0x6000c800 0x0 0x400>;
+               interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
+                        <&tegra_car TEGRA124_CLK_EMC>;
+               clock-names = "actmon", "emc";
+               resets = <&tegra_car 119>;
+               reset-names = "actmon";
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra124-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra124-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,70000868 {
+               compatible = "nvidia,tegra124-pinmux";
+               reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+                     <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra124-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA124_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000dc00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000dc00 0x0 0x200>;
+               interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC5>;
+               clock-names = "spi";
+               resets = <&tegra_car 104>;
+               reset-names = "spi";
+               dmas = <&apbdma 27>, <&apbdma 27>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000de00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000de00 0x0 0x200>;
+               interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC6>;
+               clock-names = "spi";
+               resets = <&tegra_car 105>;
+               reset-names = "spi";
+               dmas = <&apbdma 28>, <&apbdma 28>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra124-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc@0,7000e400 {
+               compatible = "nvidia,tegra124-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra124-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra132-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA124_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       emc: emc@0,7001b000 {
+               compatible = "nvidia,tegra132-emc", "nvidia,tegra124-emc";
+               reg = <0x0 0x7001b000 0x0 0x1000>;
+
+               nvidia,memory-controller = <&mc>;
+       };
+
+       sata@0,70020000 {
+               compatible = "nvidia,tegra124-ahci";
+               reg = <0x0 0x70027000 0x0 0x2000>, /* AHCI */
+                     <0x0 0x70020000 0x0 0x7000>; /* SATA */
+               interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SATA>,
+                        <&tegra_car TEGRA124_CLK_SATA_OOB>,
+                        <&tegra_car TEGRA124_CLK_CML1>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>;
+               clock-names = "sata", "sata-oob", "cml1", "pll_e";
+               resets = <&tegra_car 124>,
+                        <&tegra_car 123>,
+                        <&tegra_car 129>;
+               reset-names = "sata", "sata-oob", "sata-cold";
+               phys = <&padctl TEGRA_XUSB_PADCTL_SATA>;
+               phy-names = "sata-phy";
+               status = "disabled";
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra132-hda", "nvidia,tegra124-hda",
+                            "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_HDA>,
+                        <&tegra_car TEGRA124_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA124_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       padctl: padctl@0,7009f000 {
+               compatible = "nvidia,tegra132-xusb-padctl",
+                            "nvidia,tegra124-xusb-padctl";
+               reg = <0x0 0x7009f000 0x0 0x1000>;
+               resets = <&tegra_car 142>;
+               reset-names = "padctl";
+
+               #phy-cells = <1>;
+
+               phys {
+                       pcie-0 {
+                               status = "disabled";
+                       };
+
+                       sata-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-0 {
+                               status = "disabled";
+                       };
+
+                       utmi-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-2 {
+                               status = "disabled";
+                       };
+               };
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       soctherm: thermal-sensor@0,700e2000 {
+               compatible = "nvidia,tegra124-soctherm";
+               reg = <0x0 0x700e2000 0x0 0x1000>;
+               interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+                       <&tegra_car TEGRA124_CLK_SOC_THERM>;
+               clock-names = "tsensor", "soctherm";
+               resets = <&tegra_car 78>;
+               reset-names = "soctherm";
+               #thermal-sensor-cells = <1>;
+       };
+
+       ahub@0,70300000 {
+               compatible = "nvidia,tegra124-ahub";
+               reg = <0x0 0x70300000 0x0 0x200>,
+                     <0x0 0x70300800 0x0 0x800>,
+                     <0x0 0x70300200 0x0 0x600>;
+               interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_D_AUDIO>,
+                        <&tegra_car TEGRA124_CLK_APBIF>;
+               clock-names = "d_audio", "apbif";
+               resets = <&tegra_car 106>, /* d_audio */
+                        <&tegra_car 107>, /* apbif */
+                        <&tegra_car 30>,  /* i2s0 */
+                        <&tegra_car 11>,  /* i2s1 */
+                        <&tegra_car 18>,  /* i2s2 */
+                        <&tegra_car 101>, /* i2s3 */
+                        <&tegra_car 102>, /* i2s4 */
+                        <&tegra_car 108>, /* dam0 */
+                        <&tegra_car 109>, /* dam1 */
+                        <&tegra_car 110>, /* dam2 */
+                        <&tegra_car 10>,  /* spdif */
+                        <&tegra_car 153>, /* amx */
+                        <&tegra_car 185>, /* amx1 */
+                        <&tegra_car 154>, /* adx */
+                        <&tegra_car 180>, /* adx1 */
+                        <&tegra_car 186>, /* afc0 */
+                        <&tegra_car 187>, /* afc1 */
+                        <&tegra_car 188>, /* afc2 */
+                        <&tegra_car 189>, /* afc3 */
+                        <&tegra_car 190>, /* afc4 */
+                        <&tegra_car 191>; /* afc5 */
+               reset-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2",
+                             "i2s3", "i2s4", "dam0", "dam1", "dam2",
+                             "spdif", "amx", "amx1", "adx", "adx1",
+                             "afc0", "afc1", "afc2", "afc3", "afc4", "afc5";
+               dmas = <&apbdma 1>, <&apbdma 1>,
+                      <&apbdma 2>, <&apbdma 2>,
+                      <&apbdma 3>, <&apbdma 3>,
+                      <&apbdma 4>, <&apbdma 4>,
+                      <&apbdma 6>, <&apbdma 6>,
+                      <&apbdma 7>, <&apbdma 7>,
+                      <&apbdma 12>, <&apbdma 12>,
+                      <&apbdma 13>, <&apbdma 13>,
+                      <&apbdma 14>, <&apbdma 14>,
+                      <&apbdma 29>, <&apbdma 29>;
+               dma-names = "rx0", "tx0", "rx1", "tx1", "rx2", "tx2",
+                           "rx3", "tx3", "rx4", "tx4", "rx5", "tx5",
+                           "rx6", "tx6", "rx7", "tx7", "rx8", "tx8",
+                           "rx9", "tx9";
+               ranges;
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               tegra_i2s0: i2s@0,70301000 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301000 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <4 4>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S0>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 30>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s1: i2s@0,70301100 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301100 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <5 5>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S1>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 11>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s2: i2s@0,70301200 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301200 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <6 6>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S2>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 18>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s3: i2s@0,70301300 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301300 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <7 7>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S3>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 101>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s4: i2s@0,70301400 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301400 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <8 8>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S4>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 102>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       usb@0,7d008000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d008000 0x0 0x4000>;
+               interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>;
+               clock-names = "usb";
+               resets = <&tegra_car 59>;
+               reset-names = "usb";
+               nvidia,phy = <&phy3>;
+               status = "disabled";
+       };
+
+       phy3: usb-phy@0,7d008000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d008000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 59>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <1>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv7-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
new file mode 100644 (file)
index 0000000..2b7f889
--- /dev/null
@@ -0,0 +1,45 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1";
+       compatible = "nvidia,p2180", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x1 0x0>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts
new file mode 100644 (file)
index 0000000..1ddd851
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2530.dtsi"
+#include "tegra210-p2595.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2371 (P2530/P2595) reference design";
+       compatible = "nvidia,p2371-0000", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts
new file mode 100644 (file)
index 0000000..683b339
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2180.dtsi"
+#include "tegra210-p2597.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1 Developer Kit";
+       compatible = "nvidia,p2371-2180", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi
new file mode 100644 (file)
index 0000000..ece0dec
--- /dev/null
@@ -0,0 +1,50 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2530 main board";
+       compatible = "nvidia,p2530", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0xc0000000>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts
new file mode 100644 (file)
index 0000000..58d27dd
--- /dev/null
@@ -0,0 +1,1302 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra210-p2530.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2571 reference design";
+       compatible = "nvidia,p2571", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi
new file mode 100644 (file)
index 0000000..f3f9139
--- /dev/null
@@ -0,0 +1,1272 @@
+/ {
+       model = "NVIDIA Tegra210 P2595 I/O board";
+       compatible = "nvidia,p2595", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
new file mode 100644 (file)
index 0000000..be3eccb
--- /dev/null
@@ -0,0 +1,1270 @@
+/ {
+       model = "NVIDIA Tegra210 P2597 I/O board";
+       compatible = "nvidia,p2597", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "sata";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "touch";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "dp";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       /* MMC/SD */
+       sdhci@0,700b0000 {
+               status = "okay";
+               bus-width = <4>;
+               no-1-8-v;
+
+               cd-gpios = <&gpio TEGRA_GPIO(Z, 1) GPIO_ACTIVE_LOW>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
new file mode 100644 (file)
index 0000000..bc23f4d
--- /dev/null
@@ -0,0 +1,805 @@
+#include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra210-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra210";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra210-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA210_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0x0 0x54000000 0x0 0x54000000 0x0 0x01000000>;
+
+               dpaux1: dpaux@0,54040000 {
+                       compatible = "nvidia,tegra210-dpaux";
+                       reg = <0x0 0x54040000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX1>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 207>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               vi@0,54080000 {
+                       compatible = "nvidia,tegra210-vi";
+                       reg = <0x0 0x54080000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               tsec@0,54100000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54100000 0x0 0x00040000>;
+               };
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP1>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP2>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               dsi@0,54300000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54300000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIA>,
+                                <&tegra_car TEGRA210_CLK_DSIALP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 48>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x0c0>; /* DSIA & DSIB pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               vic@0,54340000 {
+                       compatible = "nvidia,tegra210-vic";
+                       reg = <0x0 0x54340000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvjpg@0,54380000 {
+                       compatible = "nvidia,tegra210-nvjpg";
+                       reg = <0x0 0x54380000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               dsi@0,54400000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54400000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIB>,
+                                <&tegra_car TEGRA210_CLK_DSIBLP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 82>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x300>; /* DSIC & DSID pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               nvdec@0,54480000 {
+                       compatible = "nvidia,tegra210-nvdec";
+                       reg = <0x0 0x54480000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvenc@0,544c0000 {
+                       compatible = "nvidia,tegra210-nvenc";
+                       reg = <0x0 0x544c0000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               tsec@0,54500000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54500000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra210-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR0>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               sor@0,54580000 {
+                       compatible = "nvidia,tegra210-sor1";
+                       reg = <0x0 0x54580000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR1>,
+                                <&tegra_car TEGRA210_CLK_PLL_D2_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 183>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               isp@0,54600000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54600000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               isp@0,54680000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54680000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               i2c@0,546c0000 {
+                       compatible = "nvidia,tegra210-i2c-vi";
+                       reg = <0x0 0x546c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,gic-400";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gm20b";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA210_CLK_GPU>,
+                        <&tegra_car TEGRA210_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@0,60004000 {
+               compatible = "nvidia,tegra210-ictlr";
+               reg = <0x0 0x60004000 0x0 0x40>, /* primary controller */
+                     <0x0 0x60004100 0x0 0x40>, /* secondary controller */
+                     <0x0 0x60004200 0x0 0x40>, /* tertiary controller */
+                     <0x0 0x60004300 0x0 0x40>, /* quaternary controller */
+                     <0x0 0x60004400 0x0 0x40>, /* quinary controller */
+                     <0x0 0x60004500 0x0 0x40>; /* senary controller */
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra210-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra210-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra210-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra210-gpio", "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra210-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra210-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,700008d4 {
+               compatible = "nvidia,tegra210-pinmux";
+               reg = <0x0 0x700008d4 0x0 0x29c>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x294>; /* Mux registers */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra210-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA210_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra210-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc: pmc@0,7000e400 {
+               compatible = "nvidia,tegra210-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+
+               #power-domain-cells = <1>;
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra210-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra210-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA210_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra210-hda", "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_HDA>,
+                        <&tegra_car TEGRA210_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA210_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       mipi: mipi@0,700e3000 {
+               compatible = "nvidia,tegra210-mipi";
+               reg = <0x0 0x700e3000 0x0 0x100>;
+               clocks = <&tegra_car TEGRA210_CLK_MIPI_CAL>;
+               clock-names = "mipi-cal";
+               #nvidia,mipi-calibrate-cells = <1>;
+       };
+
+       spi@0,70410000 {
+               compatible = "nvidia,tegra210-qspi";
+               reg = <0x0 0x70410000 0x0 0x1000>;
+               interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_QSPI>;
+               clock-names = "qspi";
+               resets = <&tegra_car 211>;
+               reset-names = "qspi";
+               dmas = <&apbdma 5>, <&apbdma 5>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <1>;
+               };
+
+               cpu@2 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <2>;
+               };
+
+               cpu@3 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <3>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv8-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
index 18ca9fb..86581f7 100644 (file)
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
index 007a69f..5f3ab8c 100644 (file)
@@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                return -EFAULT;
 
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      sub     %w3, %w1, %w4\n"
@@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "      .align  3\n"
 "      .quad   1b, 4b, 2b, 4b\n"
 "      .popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
        : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
        : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
        : "memory");
index 738a95f..bef6e92 100644 (file)
 #define CPTR_EL2_TCPAC (1 << 31)
 #define CPTR_EL2_TTA   (1 << 20)
 #define CPTR_EL2_TFP   (1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_DEFAULT       0x000033ff
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TDRA          (1 << 11)
index 3066328..779a587 100644 (file)
@@ -127,10 +127,14 @@ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
-       u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+       u32 mode;
 
-       if (vcpu_mode_is_32bit(vcpu))
+       if (vcpu_mode_is_32bit(vcpu)) {
+               mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
                return mode > COMPAT_PSR_MODE_USR;
+       }
+
+       mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
        return mode != PSR_MODE_EL0t;
 }
index 9b2f5a9..ae615b9 100644 (file)
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
 #include <asm/pgtable-types.h>
 
 extern void __cpu_clear_user_page(void *p, unsigned long user);
index 2d545d7..bf464de 100644 (file)
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX        __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)          (!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)          (!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)         (pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)         (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
-       if (pte_valid_user(pte)) {
-               if (!pte_special(pte) && pte_exec(pte))
-                       __sync_icache_dcache(pte, addr);
+       if (pte_valid(pte)) {
                if (pte_sw_dirty(pte) && pte_write(pte))
                        pte_val(pte) &= ~PTE_RDONLY;
                else
                        pte_val(pte) |= PTE_RDONLY;
+               if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+                       __sync_icache_dcache(pte, addr);
        }
 
        /*
index ffe9c2b..917d981 100644 (file)
@@ -514,9 +514,14 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
 #endif
 
        /* EL2 debug */
+       mrs     x0, id_aa64dfr0_el1             // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    x0, x0, #8, #4
+       cmp     x0, #1
+       b.lt    4f                              // Skip if no PMU present
        mrs     x0, pmcr_el0                    // Disable debug access traps
        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
        msr     mdcr_el2, x0                    // all PMU counters from EL1
+4:
 
        /* Stage-2 translation */
        msr     vttbr_el2, xzr
index bc2abb8..999633b 100644 (file)
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)     ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp               = __pi_memcmp;
-__efistub_memchr               = __pi_memchr;
-__efistub_memcpy               = __pi_memcpy;
-__efistub_memmove              = __pi_memmove;
-__efistub_memset               = __pi_memset;
-__efistub_strlen               = __pi_strlen;
-__efistub_strcmp               = __pi_strcmp;
-__efistub_strncmp              = __pi_strncmp;
-__efistub___flush_dcache_area  = __pi___flush_dcache_area;
+__efistub_memcmp               = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr               = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy               = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove              = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset               = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen               = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp               = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp              = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area  = KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy             = __pi_memcpy;
-__efistub___memmove            = __pi_memmove;
-__efistub___memset             = __pi_memset;
+__efistub___memcpy             = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove            = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset             = KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text                        = _text;
-__efistub__end                 = _end;
-__efistub__edata               = _edata;
+__efistub__text                        = KALLSYMS_HIDE(_text);
+__efistub__end                 = KALLSYMS_HIDE(_end);
+__efistub__edata               = KALLSYMS_HIDE(_edata);
 
 #endif
 
index ca8f5a5..f0e7bdf 100644 (file)
@@ -36,7 +36,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(val, hcr_el2);
        /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
        write_sysreg(1 << 15, hstr_el2);
-       write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+
+       val = CPTR_EL2_DEFAULT;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+       write_sysreg(val, cptr_el2);
+
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
@@ -45,7 +49,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(HCR_RW, hcr_el2);
        write_sysreg(0, hstr_el2);
        write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
-       write_sysreg(0, cptr_el2);
+       write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
index 648112e..4d1ac81 100644 (file)
 
 #define PSTATE_FAULT_BITS_64   (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
                                 PSR_I_BIT | PSR_D_BIT)
-#define EL1_EXCEPT_SYNC_OFFSET 0x200
+
+#define CURRENT_EL_SP_EL0_VECTOR       0x0
+#define CURRENT_EL_SP_ELx_VECTOR       0x200
+#define LOWER_EL_AArch64_VECTOR                0x400
+#define LOWER_EL_AArch32_VECTOR                0x600
 
 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 {
@@ -97,6 +101,34 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
                *fsr = 0x14;
 }
 
+enum exception_type {
+       except_type_sync        = 0,
+       except_type_irq         = 0x80,
+       except_type_fiq         = 0x100,
+       except_type_serror      = 0x180,
+};
+
+static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+{
+       u64 exc_offset;
+
+       switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
+       case PSR_MODE_EL1t:
+               exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+               break;
+       case PSR_MODE_EL1h:
+               exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+               break;
+       case PSR_MODE_EL0t:
+               exc_offset = LOWER_EL_AArch64_VECTOR;
+               break;
+       default:
+               exc_offset = LOWER_EL_AArch32_VECTOR;
+       }
+
+       return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -108,8 +140,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -143,8 +175,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        /*
         * Build an unknown exception, depending on the instruction
index eec3598..2e90371 100644 (file)
@@ -1007,10 +1007,9 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
                if (likely(r->access(vcpu, params, r))) {
                        /* Skip instruction, since it was emulated */
                        kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+                       /* Handled */
+                       return 0;
                }
-
-               /* Handled */
-               return 0;
        }
 
        /* Not handled */
@@ -1043,7 +1042,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 }
 
 /**
- * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
@@ -1095,7 +1094,7 @@ out:
 }
 
 /**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
index 5a22a11..0adbebb 100644 (file)
@@ -46,7 +46,7 @@ enum address_markers_idx {
        PCI_START_NR,
        PCI_END_NR,
        MODULES_START_NR,
-       MODUELS_END_NR,
+       MODULES_END_NR,
        KERNEL_SPACE_NR,
 };
 
index cf038c7..cab7a5b 100644 (file)
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
        struct memblock_region *reg;
+       int i;
 
        /*
         * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
                                pfn_to_nid(virt_to_pfn(start)));
        }
 
+       /*
+        * KAsan may reuse the contents of kasan_zero_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_zero_pte[i],
+                       pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
        memset(kasan_zero_page, 0, PAGE_SIZE);
        cpu_set_ttbr1(__pa(swapper_pg_dir));
        flush_tlb_all();
index 3571c73..0795c3a 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
        unsigned long end = start + size;
        int ret;
        struct page_change_data data;
+       struct vm_struct *area;
 
        if (!PAGE_ALIGNED(addr)) {
                start &= PAGE_MASK;
@@ -51,11 +53,27 @@ static int change_memory_common(unsigned long addr, int numpages,
                WARN_ON_ONCE(1);
        }
 
-       if (start < MODULES_VADDR || start >= MODULES_END)
+       /*
+        * Kernel VA mappings are always live, and splitting live section
+        * mappings into page mappings may cause TLB conflicts. This means
+        * we have to ensure that changing the permission bits of the range
+        * we are operating on does not result in such splitting.
+        *
+        * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+        * Those are guaranteed to consist entirely of page mappings, and
+        * splitting is never needed.
+        *
+        * So check whether the [addr, addr + size) interval is entirely
+        * covered by precisely one VM area that has the VM_ALLOC flag set.
+        */
+       area = find_vm_area((void *)addr);
+       if (!area ||
+           end > (unsigned long)area->addr + area->size ||
+           !(area->flags & VM_ALLOC))
                return -EINVAL;
 
-       if (end < MODULES_VADDR || end >= MODULES_END)
-               return -EINVAL;
+       if (!numpages)
+               return 0;
 
        data.set_mask = set_mask;
        data.clear_mask = clear_mask;
index 146bd99..e6a30e1 100644 (file)
        b.lo    9998b
        dsb     \domain
        .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+       .macro  reset_pmuserenr_el0, tmpreg
+       mrs     \tmpreg, id_aa64dfr0_el1        // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    \tmpreg, \tmpreg, #8, #4
+       cmp     \tmpreg, #1                     // Skip if no PMU present
+       b.lt    9000f
+       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+9000:
+       .endm
index a3d867e..c164d2c 100644 (file)
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
         */
        ubfx    x11, x11, #1, #1
        msr     oslar_el1, x11
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        mov     x0, x12
        dsb     nsh             // Make sure local tlb invalidation completed
        isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
        msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     x0, #1 << 12                    // Reset mdscr_el1 and disable
        msr     mdscr_el1, x0                   // access to the DCC from EL0
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        /*
         * Memory region attributes for LPAE:
         *
index 74c132d..6a86850 100644 (file)
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls                    323 /* length of syscall table */
+#define NR_syscalls                    324 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
index 762edce..41369a1 100644 (file)
 #define __NR_membarrier                        1344
 #define __NR_kcmp                      1345
 #define __NR_mlock2                    1346
+#define __NR_copy_file_range           1347
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
index 534a74a..477c55e 100644 (file)
@@ -1772,5 +1772,6 @@ sys_call_table:
        data8 sys_membarrier
        data8 sys_kcmp                          // 1345
        data8 sys_mlock2
+       data8 sys_copy_file_range
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
index 836ac5a..2841c0a 100644 (file)
@@ -276,6 +276,7 @@ source "kernel/Kconfig.preempt"
 
 config SMP
        bool "Symmetric multi-processing support"
+       depends on MMU
        ---help---
          This enables support for systems with more than one CPU. If you have
          a system with only one CPU, say N. If you have a system with more
index a96c81d..c5cd63a 100644 (file)
@@ -21,6 +21,7 @@ platforms += mti-malta
 platforms += mti-sead3
 platforms += netlogic
 platforms += paravirt
+platforms += pic32
 platforms += pistachio
 platforms += pmcs-msp71xx
 platforms += pnx833x
index fbf3f66..57a945e 100644 (file)
@@ -169,6 +169,7 @@ config BMIPS_GENERIC
        select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select ARCH_WANT_OPTIONAL_GPIOLIB
        help
          Build a generic DT-based kernel image that boots on select
          BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -480,6 +481,14 @@ config MIPS_MALTA
          This enables support for the MIPS Technologies Malta evaluation
          board.
 
+config MACH_PIC32
+       bool "Microchip PIC32 Family"
+       help
+         This enables support for the Microchip PIC32 family of platforms.
+
+         Microchip PIC32 is a family of general-purpose 32 bit MIPS core
+         microcontrollers.
+
 config MIPS_SEAD3
        bool "MIPS SEAD3 board"
        select BOOT_ELF32
@@ -979,6 +988,7 @@ source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
+source "arch/mips/pic32/Kconfig"
 source "arch/mips/pistachio/Kconfig"
 source "arch/mips/pmcs-msp71xx/Kconfig"
 source "arch/mips/ralink/Kconfig"
@@ -1755,6 +1765,10 @@ config SYS_SUPPORTS_ZBOOT_UART16550
        bool
        select SYS_SUPPORTS_ZBOOT
 
+config SYS_SUPPORTS_ZBOOT_UART_PROM
+       bool
+       select SYS_SUPPORTS_ZBOOT
+
 config CPU_LOONGSON2
        bool
        select CPU_SUPPORTS_32BIT_KERNEL
@@ -2017,7 +2031,8 @@ config KVM_GUEST
        bool "KVM Guest Kernel"
        depends on BROKEN_ON_SMP
        help
-         Select this option if building a guest kernel for KVM (Trap & Emulate) mode
+         Select this option if building a guest kernel for KVM (Trap & Emulate)
+         mode.
 
 config KVM_GUEST_TIMER_FREQ
        int "Count/Compare Timer Frequency (MHz)"
index 3f70ba5..e78d60d 100644 (file)
@@ -166,16 +166,6 @@ cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
 endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
-#
-# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
-# as MIPS64 R1; older versions as just R1.  This leaves the possibility open
-# that GCC might generate R2 code for -march=loongson3a which then is rejected
-# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
-# can't easily be used safely within the kbuild framework.
-#
-cflags-$(CONFIG_CPU_LOONGSON3)  +=                                     \
-       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
-       -Wa,-mips64r2 -Wa,--trap
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS) += $(call cc-option,-mfix-r4400,)
index f9bc4f5..84548f7 100644 (file)
@@ -40,7 +40,7 @@
 
 static int gpio2_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
+       return !!alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
 }
 
 static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
@@ -68,7 +68,7 @@ static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
 
 static int gpio1_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
+       return !!alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
 }
 
 static void gpio1_set(struct gpio_chip *chip,
@@ -119,7 +119,7 @@ struct gpio_chip alchemy_gpio_chip[] = {
 
 static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off)
 {
-       return au1300_gpio_get_value(off + AU1300_GPIO_BASE);
+       return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE);
 }
 
 static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
index f493045..f969f58 100644 (file)
@@ -37,7 +37,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
                                container_of(chip, struct ar7_gpio_chip, chip);
        void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
 
-       return readl(gpio_in) & (1 << gpio);
+       return !!(readl(gpio_in) & (1 << gpio));
 }
 
 static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
index ca7cc19..870c6b2 100644 (file)
@@ -23,7 +23,6 @@ void ath79_clocks_init(void);
 unsigned long ath79_get_sys_clk_rate(const char *id);
 
 void ath79_ddr_ctrl_init(void);
-void ath79_ddr_wb_flush(unsigned int reg);
 
 void ath79_gpio_init(void);
 
index eeb3953..511c065 100644 (file)
 #include "common.h"
 #include "machtypes.h"
 
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq);
+
 static void ath79_misc_irq_handler(struct irq_desc *desc)
 {
-       void __iomem *base = ath79_reset_base;
+       struct irq_domain *domain = irq_desc_get_handler_data(desc);
+       void __iomem *base = domain->host_data;
        u32 pending;
 
        pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
@@ -42,15 +46,15 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
        while (pending) {
                int bit = __ffs(pending);
 
-               generic_handle_irq(ATH79_MISC_IRQ(bit));
+               generic_handle_irq(irq_linear_revmap(domain, bit));
                pending &= ~BIT(bit);
        }
 }
 
 static void ar71xx_misc_irq_unmask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -62,8 +66,8 @@ static void ar71xx_misc_irq_unmask(struct irq_data *d)
 
 static void ar71xx_misc_irq_mask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -75,8 +79,8 @@ static void ar71xx_misc_irq_mask(struct irq_data *d)
 
 static void ar724x_misc_irq_ack(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
@@ -94,12 +98,6 @@ static struct irq_chip ath79_misc_irq_chip = {
 
 static void __init ath79_misc_irq_init(void)
 {
-       void __iomem *base = ath79_reset_base;
-       int i;
-
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
        if (soc_is_ar71xx() || soc_is_ar913x())
                ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
        else if (soc_is_ar724x() ||
@@ -110,13 +108,7 @@ static void __init ath79_misc_irq_init(void)
        else
                BUG();
 
-       for (i = ATH79_MISC_IRQ_BASE;
-            i < ATH79_MISC_IRQ_BASE + ATH79_MISC_IRQ_COUNT; i++) {
-               irq_set_chip_and_handler(i, &ath79_misc_irq_chip,
-                                        handle_level_irq);
-       }
-
-       irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler);
+       ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
 }
 
 static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
@@ -256,10 +248,10 @@ asmlinkage void plat_irq_dispatch(void)
        }
 }
 
-#ifdef CONFIG_IRQCHIP
 static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
        irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, d->host_data);
        return 0;
 }
 
@@ -268,19 +260,14 @@ static const struct irq_domain_ops misc_irq_domain_ops = {
        .map = misc_map,
 };
 
-static int __init ath79_misc_intc_of_init(
-       struct device_node *node, struct device_node *parent)
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq)
 {
        void __iomem *base = ath79_reset_base;
        struct irq_domain *domain;
-       int irq;
-
-       irq = irq_of_parse_and_map(node, 0);
-       if (!irq)
-               panic("Failed to get MISC IRQ");
 
        domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
-                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, NULL);
+                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
        if (!domain)
                panic("Failed to add MISC irqdomain");
 
@@ -288,9 +275,19 @@ static int __init ath79_misc_intc_of_init(
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
 
+       irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
+}
 
-       irq_set_chained_handler(irq, ath79_misc_irq_handler);
+static int __init ath79_misc_intc_of_init(
+       struct device_node *node, struct device_node *parent)
+{
+       int irq;
 
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               panic("Failed to get MISC IRQ");
+
+       ath79_misc_intc_domain_init(node, irq);
        return 0;
 }
 
@@ -349,8 +346,6 @@ static int __init ar79_cpu_intc_of_init(
 IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
                ar79_cpu_intc_of_init);
 
-#endif
-
 void __init arch_init_irq(void)
 {
        if (mips_machtype == ATH79_MACH_GENERIC_OF) {
index 8755d61..be451ee 100644 (file)
 
 #define ATH79_SYS_TYPE_LEN     64
 
-#define AR71XX_BASE_FREQ       40000000
-#define AR724X_BASE_FREQ       5000000
-#define AR913X_BASE_FREQ       5000000
-
 static char ath79_sys_type[ATH79_SYS_TYPE_LEN];
 
 static void ath79_restart(char *command)
@@ -272,15 +268,10 @@ void __init device_tree_init(void)
        unflatten_and_copy_device_tree();
 }
 
-static void __init ath79_generic_init(void)
-{
-       /* Nothing to do */
-}
-
 MIPS_MACHINE(ATH79_MACH_GENERIC,
             "Generic",
             "Generic AR71XX/AR724X/AR913X based board",
-            ath79_generic_init);
+            NULL);
 
 MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
             "DTB",
index a7e569c..959c145 100644 (file)
@@ -666,9 +666,15 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
        switch (bus->hosttype) {
        case BCMA_HOSTTYPE_PCI:
                memset(out, 0, sizeof(struct ssb_sprom));
-               snprintf(buf, sizeof(buf), "pci/%u/%u/",
-                        bus->host_pci->bus->number + 1,
-                        PCI_SLOT(bus->host_pci->devfn));
+               /* On BCM47XX all PCI buses share the same domain */
+               if (config_enabled(CONFIG_BCM47XX))
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                bus->host_pci->bus->number + 1,
+                                PCI_SLOT(bus->host_pci->devfn));
+               else
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                pci_domain_nr(bus->host_pci->bus) + 1,
+                                bus->host_pci->bus->number);
                bcm47xx_sprom_apply_prefix_alias(buf, sizeof(buf));
                prefix = buf;
                break;
index 4b50d40..05757ae 100644 (file)
@@ -10,6 +10,7 @@
 
 #define pr_fmt(fmt) "bcm63xx_nvram: " fmt
 
+#include <linux/bcm963xx_nvram.h>
 #include <linux/init.h>
 #include <linux/crc32.h>
 #include <linux/export.h>
 
 #include <bcm63xx_nvram.h>
 
-/*
- * nvram structure
- */
-struct bcm963xx_nvram {
-       u32     version;
-       u8      reserved1[256];
-       u8      name[16];
-       u32     main_tp_number;
-       u32     psi_size;
-       u32     mac_addr_count;
-       u8      mac_addr_base[ETH_ALEN];
-       u8      reserved2[2];
-       u32     checksum_old;
-       u8      reserved3[720];
-       u32     checksum_high;
-};
-
 #define BCM63XX_DEFAULT_PSI_SIZE       64
 
 static struct bcm963xx_nvram nvram;
@@ -42,27 +26,14 @@ static int mac_addr_used;
 
 void __init bcm63xx_nvram_init(void *addr)
 {
-       unsigned int check_len;
        u32 crc, expected_crc;
        u8 hcs_mac_addr[ETH_ALEN] = { 0x00, 0x10, 0x18, 0xff, 0xff, 0xff };
 
        /* extract nvram data */
-       memcpy(&nvram, addr, sizeof(nvram));
+       memcpy(&nvram, addr, BCM963XX_NVRAM_V5_SIZE);
 
        /* check checksum before using data */
-       if (nvram.version <= 4) {
-               check_len = offsetof(struct bcm963xx_nvram, reserved3);
-               expected_crc = nvram.checksum_old;
-               nvram.checksum_old = 0;
-       } else {
-               check_len = sizeof(nvram);
-               expected_crc = nvram.checksum_high;
-               nvram.checksum_high = 0;
-       }
-
-       crc = crc32_le(~0, (u8 *)&nvram, check_len);
-
-       if (crc != expected_crc)
+       if (bcm963xx_nvram_checksum(&nvram, &expected_crc, &crc))
                pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
                        expected_crc, crc);
 
index 5b16d29..3553528 100644 (file)
@@ -105,6 +105,7 @@ static const struct bmips_quirk bmips_quirk_list[] = {
        { "brcm,bcm33843-viper",        &bcm3384_viper_quirks           },
        { "brcm,bcm6328",               &bcm6328_quirks                 },
        { "brcm,bcm6368",               &bcm6368_quirks                 },
+       { "brcm,bcm63168",              &bcm6368_quirks                 },
        { },
 };
 
index d5bdee1..4eff1ef 100644 (file)
@@ -29,20 +29,23 @@ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
        -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
        -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
-targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
-
 # decompressor objects (linked with vmlinuz)
 vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 
 ifdef CONFIG_DEBUG_ZBOOT
 vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT)                 += $(obj)/dbg.o
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
+vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART_PROM) += $(obj)/uart-prom.o
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)                += $(obj)/uart-alchemy.o
 endif
 
-ifdef CONFIG_KERNEL_XZ
-vmlinuzobjs-y += $(obj)/../../lib/ashldi3.o
-endif
+vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+
+$(obj)/ashldi3.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
+$(obj)/ashldi3.c: $(srctree)/arch/mips/lib/ashldi3.c
+       $(call cmd,shipped)
+
+targets := $(notdir $(vmlinuzobjs-y))
 
 targets += vmlinux.bin
 OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S
@@ -60,7 +63,7 @@ targets += vmlinux.bin.z
 $(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
        $(call if_changed,$(tool_y))
 
-targets += piggy.o
+targets += piggy.o dummy.o
 OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
                        --set-section-flags=.image=contents,alloc,load,readonly,data
 $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
diff --git a/arch/mips/boot/compressed/uart-prom.c b/arch/mips/boot/compressed/uart-prom.c
new file mode 100644 (file)
index 0000000..1c3d51b
--- /dev/null
@@ -0,0 +1,7 @@
+
+extern void prom_putchar(unsigned char ch);
+
+void putc(char c)
+{
+       prom_putchar(c);
+}
index a0bf516..fc7a0a9 100644 (file)
@@ -4,6 +4,7 @@ dts-dirs        += ingenic
 dts-dirs       += lantiq
 dts-dirs       += mti
 dts-dirs       += netlogic
+dts-dirs       += pic32
 dts-dirs       += qca
 dts-dirs       += ralink
 dts-dirs       += xilfpga
index d52ce3d..d61b161 100644 (file)
@@ -31,6 +31,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
@@ -73,6 +74,7 @@
                timer: timer@10000040 {
                        compatible = "syscon";
                        reg = <0x10000040 0x2c>;
+                       little-endian;
                };
 
                reboot {
                        offset = <0x28>;
                        mask = <0x1>;
                };
+
+               leds0: led-controller@10000800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6328-leds";
+                       reg = <0x10000800 0x24>;
+                       status = "disabled";
+               };
        };
 };
index 45152bc..9c8d3fe 100644 (file)
@@ -32,6 +32,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
                compatible = "simple-bus";
                ranges;
 
+               periph_cntl: syscon@10000000 {
+                       compatible = "syscon";
+                       reg = <0x10000000 0x14>;
+                       little-endian;
+               };
+
+               reboot: syscon-reboot@10000008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
                periph_intc: periph_intc@10000020 {
                        compatible = "brcm,bcm3380-l2-intc";
                        reg = <0x10000024 0x4 0x1000002c 0x4>,
                        interrupts = <2>;
                };
 
+               leds0: led-controller@100000d0 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6358-leds";
+                       reg = <0x100000d0 0x8>;
+                       status = "disabled";
+               };
+
                uart0: serial@10000100 {
                        compatible = "brcm,bcm6345-uart";
                        reg = <0x10000100 0x18>;
index 4fc7ece..1a7efa8 100644 (file)
@@ -98,6 +98,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index a3039bb..d4bf52c 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 4274ff4..8e25016 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 0dcc916..7e5f760 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 2f3f9fc..c739ea7 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index bee221b..5f55d0a 100644 (file)
@@ -99,6 +99,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index 571f30f..e24d41a 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 614ee21..8b9432c 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 9fcb9e7..1652d8d 100644 (file)
 &uart4 {
        status = "okay";
 };
+
+&nemc {
+       status = "okay";
+
+       nandc: nand-controller@1 {
+               compatible = "ingenic,jz4780-nand";
+               reg = <1 0 0x1000000>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ingenic,bch-controller = <&bch>;
+
+               ingenic,nemc-tAS = <10>;
+               ingenic,nemc-tAH = <5>;
+               ingenic,nemc-tBP = <10>;
+               ingenic,nemc-tAW = <15>;
+               ingenic,nemc-tSTRV = <100>;
+
+               nand@1 {
+                       reg = <1>;
+
+                       nand-ecc-step-size = <1024>;
+                       nand-ecc-strength = <24>;
+                       nand-ecc-mode = "hw";
+                       nand-on-flash-bbt;
+
+                       partitions {
+                               compatible = "fixed-partitions";
+                               #address-cells = <2>;
+                               #size-cells = <2>;
+
+                               partition@0 {
+                                       label = "u-boot-spl";
+                                       reg = <0x0 0x0 0x0 0x800000>;
+                               };
+
+                               partition@0x800000 {
+                                       label = "u-boot";
+                                       reg = <0x0 0x800000 0x0 0x200000>;
+                               };
+
+                               partition@0xa00000 {
+                                       label = "u-boot-env";
+                                       reg = <0x0 0xa00000 0x0 0x200000>;
+                               };
+
+                               partition@0xc00000 {
+                                       label = "boot";
+                                       reg = <0x0 0xc00000 0x0 0x4000000>;
+                               };
+
+                               partition@0x8c00000 {
+                                       label = "system";
+                                       reg = <0x0 0x4c00000 0x1 0xfb400000>;
+                               };
+                       };
+               };
+       };
+};
+
+&bch {
+       status = "okay";
+};
index 65389f6..b868b42 100644 (file)
 
                status = "disabled";
        };
+
+       nemc: nemc@13410000 {
+               compatible = "ingenic,jz4780-nemc";
+               reg = <0x13410000 0x10000>;
+               #address-cells = <2>;
+               #size-cells = <1>;
+               ranges = <1 0 0x1b000000 0x1000000
+                         2 0 0x1a000000 0x1000000
+                         3 0 0x19000000 0x1000000
+                         4 0 0x18000000 0x1000000
+                         5 0 0x17000000 0x1000000
+                         6 0 0x16000000 0x1000000>;
+
+               clocks = <&cgu JZ4780_CLK_NEMC>;
+
+               status = "disabled";
+       };
+
+       bch: bch@134d0000 {
+               compatible = "ingenic,jz4780-bch";
+               reg = <0x134d0000 0x10000>;
+
+               clocks = <&cgu JZ4780_CLK_BCH>;
+
+               status = "disabled";
+       };
 };
diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile
new file mode 100644 (file)
index 0000000..7ac7905
--- /dev/null
@@ -0,0 +1,12 @@
+dtb-$(CONFIG_DTB_PIC32_MZDA_SK)                += pic32mzda_sk.dtb
+
+dtb-$(CONFIG_DTB_PIC32_NONE)           += \
+                                       pic32mzda_sk.dtb
+
+obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-                           += dummy.o
+
+always                         := $(dtb-y)
+clean-files                    := *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
new file mode 100644 (file)
index 0000000..ef13350
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Device Tree Source for PIC32MZDA clock data
+ *
+ * Purna Chandra Mandal <purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+/* all fixed rate clocks */
+
+/ {
+       POSC:posc_clk { /* On-chip primary oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+       };
+
+       FRC:frc_clk { /* internal FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       BFRC:bfrc_clk { /* internal backup FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       LPRC:lprc_clk { /* internal low-power FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <32000>;
+       };
+
+       /* UPLL provides clock to USBCORE */
+       UPLL:usb_phy_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               clock-output-names = "usbphy_clk";
+       };
+
+       TxCKI:txcki_clk { /* external clock input on TxCLKI pin */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <4000000>;
+               status = "disabled";
+       };
+
+       /* external clock input on REFCLKIx pin */
+       REFIx:refix_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               status = "disabled";
+       };
+
+       /* PIC32 specific clks */
+       pic32_clktree {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               reg = <0x1f801200 0x200>;
+               compatible = "microchip,pic32mzda-clk";
+               ranges = <0 0x1f801200 0x200>;
+
+               /* secondary oscillator; external input on SOSCI pin */
+               SOSC:sosc_clk@0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sosc";
+                       clock-frequency = <32768>;
+                       reg = <0x000 0x10>,   /* enable reg */
+                             <0x1d0 0x10>; /* status reg */
+                       microchip,bit-mask = <0x02>; /* enable mask */
+                       microchip,status-bit-mask = <0x10>; /* status-mask*/
+               };
+
+               FRCDIV:frcdiv_clk {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-frcdivclk";
+                       clocks = <&FRC>;
+                       clock-output-names = "frcdiv_clk";
+               };
+
+               /* System PLL clock */
+               SYSPLL:spll_clk@020 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-syspll";
+                       reg = <0x020 0x10>, /* SPLL register */
+                             <0x1d0 0x10>; /* CLKSTAT register */
+                       clocks = <&POSC>, <&FRC>;
+                       clock-output-names = "sys_pll";
+                       microchip,status-bit-mask = <0x80>; /* SPLLRDY */
+               };
+
+               /* system clock; mux with postdiv & slew */
+               SYSCLK:sys_clk@1c0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sysclk-v2";
+                       reg = <0x1c0 0x04>; /* SLEWCON */
+                       clocks = <&FRCDIV>, <&SYSPLL>, <&POSC>, <&SOSC>,
+                                <&LPRC>, <&FRCDIV>;
+                       microchip,clock-indices = <0>, <1>, <2>, <4>,
+                                                 <5>, <7>;
+                       clock-output-names = "sys_clk";
+               };
+
+               /* Peripheral bus1 clock */
+               PBCLK1:pb1_clk@140 {
+                       reg = <0x140 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb1_clk";
+                       /* used by system modules, not gateable */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus2 clock */
+               PBCLK2:pb2_clk@150 {
+                       reg = <0x150 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb2_clk";
+                       /* avoid gating even if unused */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus3 clock */
+               PBCLK3:pb3_clk@160 {
+                       reg = <0x160 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb3_clk";
+               };
+
+               /* Peripheral bus4 clock(I/O ports, GPIO) */
+               PBCLK4:pb4_clk@170 {
+                       reg = <0x170 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb4_clk";
+               };
+
+               /* Peripheral bus clock */
+               PBCLK5:pb5_clk@180 {
+                       reg = <0x180 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb5_clk";
+               };
+
+               /* Peripheral Bus6 clock; */
+               PBCLK6:pb6_clk@190 {
+                       reg = <0x190 0x10>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       #clock-cells = <0>;
+               };
+
+               /* Peripheral bus7 clock */
+               PBCLK7:pb7_clk@1a0 {
+                       reg = <0x1a0 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       /* CPU is driven by this clock; so named */
+                       clock-output-names = "cpu_clk";
+                       clocks = <&SYSCLK>;
+               };
+
+               /* Reference Oscillator clock for SPI/I2S */
+               REFCLKO1:refo1_clk@80 {
+                       reg = <0x080 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo1_clk";
+               };
+
+               /* Reference Oscillator clock for SQI */
+               REFCLKO2:refo2_clk@a0 {
+                       reg = <0x0a0 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo2_clk";
+               };
+
+               /* Reference Oscillator clock, ADC */
+               REFCLKO3:refo3_clk@c0 {
+                       reg = <0x0c0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo3_clk";
+               };
+
+               /* Reference Oscillator clock */
+               REFCLKO4:refo4_clk@e0 {
+                       reg = <0x0e0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo4_clk";
+               };
+
+               /* Reference Oscillator clock, LCD */
+               REFCLKO5:refo5_clk@100 {
+                       reg = <0x100 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>,<&PBCLK1>,<&POSC>,<&FRC>,<&LPRC>,
+                                <&SOSC>,<&SYSPLL>,<&REFIx>,<&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo5_clk";
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
new file mode 100644 (file)
index 0000000..ad9e331
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda-clk.dtsi"
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&evic>;
+
+       aliases {
+               gpio0 = &gpio0;
+               gpio1 = &gpio1;
+               gpio2 = &gpio2;
+               gpio3 = &gpio3;
+               gpio4 = &gpio4;
+               gpio5 = &gpio5;
+               gpio6 = &gpio6;
+               gpio7 = &gpio7;
+               gpio8 = &gpio8;
+               gpio9 = &gpio9;
+               serial0 = &uart1;
+               serial1 = &uart2;
+               serial2 = &uart3;
+               serial3 = &uart4;
+               serial4 = &uart5;
+               serial5 = &uart6;
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       compatible = "mti,mips14KEc";
+                       device_type = "cpu";
+               };
+       };
+
+       soc {
+               compatible = "microchip,pic32mzda-infra";
+               interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+       };
+
+       evic: interrupt-controller@1f810000 {
+               compatible = "microchip,pic32mzda-evic";
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               reg = <0x1f810000 0x1000>;
+               microchip,external-irqs = <3 8 13 18 23>;
+       };
+
+       pic32_pinctrl: pinctrl@1f801400{
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "microchip,pic32mzda-pinctrl";
+               reg = <0x1f801400 0x400>;
+               clocks = <&PBCLK1>;
+       };
+
+       /* PORTA */
+       gpio0: gpio0@1f860000 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860000 0x100>;
+               interrupts = <118 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <0>;
+               gpio-ranges = <&pic32_pinctrl 0 0 16>;
+       };
+
+       /* PORTB */
+       gpio1: gpio1@1f860100 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860100 0x100>;
+               interrupts = <119 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <1>;
+               gpio-ranges = <&pic32_pinctrl 0 16 16>;
+       };
+
+       /* PORTC */
+       gpio2: gpio2@1f860200 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860200 0x100>;
+               interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <2>;
+               gpio-ranges = <&pic32_pinctrl 0 32 16>;
+       };
+
+       /* PORTD */
+       gpio3: gpio3@1f860300 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860300 0x100>;
+               interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <3>;
+               gpio-ranges = <&pic32_pinctrl 0 48 16>;
+       };
+
+       /* PORTE */
+       gpio4: gpio4@1f860400 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860400 0x100>;
+               interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <4>;
+               gpio-ranges = <&pic32_pinctrl 0 64 16>;
+       };
+
+       /* PORTF */
+       gpio5: gpio5@1f860500 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860500 0x100>;
+               interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <5>;
+               gpio-ranges = <&pic32_pinctrl 0 80 16>;
+       };
+
+       /* PORTG */
+       gpio6: gpio6@1f860600 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860600 0x100>;
+               interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <6>;
+               gpio-ranges = <&pic32_pinctrl 0 96 16>;
+       };
+
+       /* PORTH */
+       gpio7: gpio7@1f860700 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860700 0x100>;
+               interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <7>;
+               gpio-ranges = <&pic32_pinctrl 0 112 16>;
+       };
+
+       /* PORTI does not exist */
+
+       /* PORTJ */
+       gpio8: gpio8@1f860800 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860800 0x100>;
+               interrupts = <126 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <8>;
+               gpio-ranges = <&pic32_pinctrl 0 128 16>;
+       };
+
+       /* PORTK */
+       gpio9: gpio9@1f860900 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860900 0x100>;
+               interrupts = <127 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <9>;
+               gpio-ranges = <&pic32_pinctrl 0 144 16>;
+       };
+
+       sdhci: sdhci@1f8ec000 {
+               compatible = "microchip,pic32mzda-sdhci";
+               reg = <0x1f8ec000 0x100>;
+               interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&REFCLKO4>, <&PBCLK5>;
+               clock-names = "base_clk", "sys_clk";
+               bus-width = <4>;
+               cap-sd-highspeed;
+               status = "disabled";
+       };
+
+       uart1: serial@1f822000 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822000 0x50>;
+               interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+                       <113 IRQ_TYPE_LEVEL_HIGH>,
+                       <114 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart2: serial@1f822200 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822200 0x50>;
+               interrupts = <145 IRQ_TYPE_LEVEL_HIGH>,
+                       <146 IRQ_TYPE_LEVEL_HIGH>,
+                       <147 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart3: serial@1f822400 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822400 0x50>;
+               interrupts = <157 IRQ_TYPE_LEVEL_HIGH>,
+                       <158 IRQ_TYPE_LEVEL_HIGH>,
+                       <159 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart4: serial@1f822600 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822600 0x50>;
+               interrupts = <170 IRQ_TYPE_LEVEL_HIGH>,
+                       <171 IRQ_TYPE_LEVEL_HIGH>,
+                       <172 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart5: serial@1f822800 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822800 0x50>;
+               interrupts = <179 IRQ_TYPE_LEVEL_HIGH>,
+                       <180 IRQ_TYPE_LEVEL_HIGH>,
+                       <181 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart6: serial@1f822A00 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822A00 0x50>;
+               interrupts = <188 IRQ_TYPE_LEVEL_HIGH>,
+                       <189 IRQ_TYPE_LEVEL_HIGH>,
+                       <190 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
new file mode 100644 (file)
index 0000000..5d434a5
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda.dtsi"
+
+/ {
+       compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda";
+       model = "Microchip PIC32MZDA Starter Kit";
+
+       memory {
+               device_type = "memory";
+               reg = <0x08000000 0x08000000>;
+       };
+
+       chosen {
+               bootargs = "earlyprintk=ttyPIC1,115200n8r console=ttyPIC1,115200n8";
+       };
+
+       leds0 {
+               compatible = "gpio-leds";
+               pinctrl-names = "default";
+               pinctrl-0 = <&user_leds_s0>;
+
+               led@1 {
+                       label = "pic32mzda_sk:red:led1";
+                       gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "heartbeat";
+               };
+
+               led@2 {
+                       label = "pic32mzda_sk:yellow:led2";
+                       gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "mmc0";
+               };
+
+               led@3 {
+                       label = "pic32mzda_sk:green:led3";
+                       gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
+                       default-state = "on";
+               };
+       };
+
+       keys0 {
+               compatible = "gpio-keys";
+               pinctrl-0 = <&user_buttons_s0>;
+               pinctrl-names = "default";
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               button@sw1 {
+                       label = "ESC";
+                       linux,code = <1>;
+                       gpios = <&gpio1 12 0>;
+               };
+
+               button@sw2 {
+                       label = "Home";
+                       linux,code = <102>;
+                       gpios = <&gpio1 13 0>;
+               };
+
+               button@sw3 {
+                       label = "Menu";
+                       linux,code = <139>;
+                       gpios = <&gpio1 14 0>;
+               };
+       };
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2>;
+       status = "okay";
+};
+
+&uart4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart4>;
+       status = "okay";
+};
+
+&sdhci {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_sdhc1>;
+       status = "okay";
+       assigned-clocks = <&REFCLKO2>,<&REFCLKO4>,<&REFCLKO5>;
+       assigned-clock-rates = <50000000>,<25000000>,<40000000>;
+};
+
+&pic32_pinctrl {
+
+       pinctrl_sdhc1: sdhc1_pins0 {
+               pins = "A6", "D4", "G13", "G12", "G14", "A7", "A0";
+               microchip,digital;
+       };
+
+       user_leds_s0: user_leds_s0 {
+               pins = "H0", "H1", "H2";
+               output-low;
+               microchip,digital;
+       };
+
+       user_buttons_s0: user_buttons_s0 {
+               pins = "B12", "B13", "B14";
+               microchip,digital;
+               input-enable;
+               bias-pull-up;
+       };
+
+       pinctrl_uart2: pinctrl_uart2 {
+               uart2-tx {
+                       pins = "G9";
+                       function = "U2TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart2-rx {
+                       pins = "B0";
+                       function = "U2RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+
+       pinctrl_uart4: uart4-0 {
+               uart4-tx {
+                       pins = "C3";
+                       function = "U4TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart4-rx {
+                       pins = "E8";
+                       function = "U4RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+};
index 13d0439..3ad4ba9 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       compatible = "qca,ar7100-ehci", "generic-ehci";
+                       reg = <0x1b000100 0x100>;
+
+                       interrupts = <3>;
+                       resets = <&rst 5>;
+
+                       has-transaction-translator;
+
+                       phy-names = "usb";
+                       phys = <&usb_phy>;
+
+                       status = "disabled";
+               };
+
                spi@1f000000 {
                        compatible = "qca,ar9132-spi", "qca,ar7100-spi";
                        reg = <0x1f000000 0x10>;
                        #size-cells = <0>;
                };
        };
+
+       usb_phy: usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+
+               status = "disabled";
+       };
 };
index 003015a..e535ee3 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       status = "okay";
+               };
+
                spi@1f000000 {
                        status = "okay";
                        num-cs = <1>;
                };
        };
 
+       usb-phy {
+               status = "okay";
+       };
+
        gpio-keys {
                compatible = "gpio-keys-polled";
                #address-cells = <1>;
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
new file mode 100644 (file)
index 0000000..52192c6
--- /dev/null
@@ -0,0 +1,89 @@
+CONFIG_MACH_PIC32=y
+CONFIG_DTB_PIC32_MZDA_SK=y
+CONFIG_HZ_100=y
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_SECCOMP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_RELAY=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_BINFMT_MISC=m
+# CONFIG_SUSPEND is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_INPUT_LEDS=m
+CONFIG_INPUT_POLLDEV=y
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_PIC32=y
+CONFIG_SERIAL_PIC32_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+CONFIG_HIDRAW=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_MICROCHIP_PIC32=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+# CONFIG_MIPS_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_FSCACHE=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
index 06b9bc7..c3212ff 100644 (file)
 #ifndef __ASM_CACHEOPS_H
 #define __ASM_CACHEOPS_H
 
+/*
+ * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * bit field identifying the cache operation.
+ */
+#define CacheOp_Cache                  0x03
+#define CacheOp_Op                     0x1c
+
+#define Cache_I                                0x00
+#define Cache_D                                0x01
+#define Cache_T                                0x02
+#define Cache_S                                0x03
+
+#define Index_Writeback_Inv            0x00
+#define Index_Load_Tag                 0x04
+#define Index_Store_Tag                        0x08
+#define Hit_Invalidate                 0x10
+#define Hit_Writeback_Inv              0x14    /* not with Cache_I though */
+#define Hit_Writeback                  0x18
+
 /*
  * Cache Operations available on all MIPS processors with R4000-style caches
  */
-#define Index_Invalidate_I             0x00
-#define Index_Writeback_Inv_D          0x01
-#define Index_Load_Tag_I               0x04
-#define Index_Load_Tag_D               0x05
-#define Index_Store_Tag_I              0x08
-#define Index_Store_Tag_D              0x09
-#define Hit_Invalidate_I               0x10
-#define Hit_Invalidate_D               0x11
-#define Hit_Writeback_Inv_D            0x15
+#define Index_Invalidate_I             (Cache_I | Index_Writeback_Inv)
+#define Index_Writeback_Inv_D          (Cache_D | Index_Writeback_Inv)
+#define Index_Load_Tag_I               (Cache_I | Index_Load_Tag)
+#define Index_Load_Tag_D               (Cache_D | Index_Load_Tag)
+#define Index_Store_Tag_I              (Cache_I | Index_Store_Tag)
+#define Index_Store_Tag_D              (Cache_D | Index_Store_Tag)
+#define Hit_Invalidate_I               (Cache_I | Hit_Invalidate)
+#define Hit_Invalidate_D               (Cache_D | Hit_Invalidate)
+#define Hit_Writeback_Inv_D            (Cache_D | Hit_Writeback_Inv)
 
 /*
  * R4000-specific cacheops
  */
-#define Create_Dirty_Excl_D            0x0d
-#define Fill                           0x14
-#define Hit_Writeback_I                        0x18
-#define Hit_Writeback_D                        0x19
+#define Create_Dirty_Excl_D            (Cache_D | 0x0c)
+#define Fill                           (Cache_I | 0x14)
+#define Hit_Writeback_I                        (Cache_I | Hit_Writeback)
+#define Hit_Writeback_D                        (Cache_D | Hit_Writeback)
 
 /*
  * R4000SC and R4400SC-specific cacheops
  */
-#define Index_Invalidate_SI            0x02
-#define Index_Writeback_Inv_SD         0x03
-#define Index_Load_Tag_SI              0x06
-#define Index_Load_Tag_SD              0x07
-#define Index_Store_Tag_SI             0x0A
-#define Index_Store_Tag_SD             0x0B
-#define Create_Dirty_Excl_SD           0x0f
-#define Hit_Invalidate_SI              0x12
-#define Hit_Invalidate_SD              0x13
-#define Hit_Writeback_Inv_SD           0x17
-#define Hit_Writeback_SD               0x1b
-#define Hit_Set_Virtual_SI             0x1e
-#define Hit_Set_Virtual_SD             0x1f
+#define Cache_SI                       0x02
+#define Cache_SD                       0x03
+
+#define Index_Invalidate_SI            (Cache_SI | Index_Writeback_Inv)
+#define Index_Writeback_Inv_SD         (Cache_SD | Index_Writeback_Inv)
+#define Index_Load_Tag_SI              (Cache_SI | Index_Load_Tag)
+#define Index_Load_Tag_SD              (Cache_SD | Index_Load_Tag)
+#define Index_Store_Tag_SI             (Cache_SI | Index_Store_Tag)
+#define Index_Store_Tag_SD             (Cache_SD | Index_Store_Tag)
+#define Create_Dirty_Excl_SD           (Cache_SD | 0x0c)
+#define Hit_Invalidate_SI              (Cache_SI | Hit_Invalidate)
+#define Hit_Invalidate_SD              (Cache_SD | Hit_Invalidate)
+#define Hit_Writeback_Inv_SD           (Cache_SD | Hit_Writeback_Inv)
+#define Hit_Writeback_SD               (Cache_SD | Hit_Writeback)
+#define Hit_Set_Virtual_SI             (Cache_SI | 0x1c)
+#define Hit_Set_Virtual_SD             (Cache_SD | 0x1c)
 
 /*
  * R5000-specific cacheops
  */
-#define R5K_Page_Invalidate_S          0x17
+#define R5K_Page_Invalidate_S          (Cache_S | 0x14)
 
 /*
  * RM7000-specific cacheops
  */
-#define Page_Invalidate_T              0x16
-#define Index_Store_Tag_T              0x0a
-#define Index_Load_Tag_T               0x06
+#define Page_Invalidate_T              (Cache_T | 0x14)
+#define Index_Store_Tag_T              (Cache_T | Index_Store_Tag)
+#define Index_Load_Tag_T               (Cache_T | Index_Load_Tag)
 
 /*
  * R10000-specific cacheops
  * Cacheops 0x02, 0x06, 0x0a, 0x0c-0x0e, 0x16, 0x1a and 0x1e are unused.
  * Most of the _S cacheops are identical to the R4000SC _SD cacheops.
  */
-#define Index_Writeback_Inv_S          0x03
-#define Index_Load_Tag_S               0x07
-#define Index_Store_Tag_S              0x0B
-#define Hit_Invalidate_S               0x13
+#define Index_Writeback_Inv_S          (Cache_S | Index_Writeback_Inv)
+#define Index_Load_Tag_S               (Cache_S | Index_Load_Tag)
+#define Index_Store_Tag_S              (Cache_S | Index_Store_Tag)
+#define Hit_Invalidate_S               (Cache_S | Hit_Invalidate)
 #define Cache_Barrier                  0x14
-#define Hit_Writeback_Inv_S            0x17
-#define Index_Load_Data_I              0x18
-#define Index_Load_Data_D              0x19
-#define Index_Load_Data_S              0x1b
-#define Index_Store_Data_I             0x1c
-#define Index_Store_Data_D             0x1d
-#define Index_Store_Data_S             0x1f
+#define Hit_Writeback_Inv_S            (Cache_S | Hit_Writeback_Inv)
+#define Index_Load_Data_I              (Cache_I | 0x18)
+#define Index_Load_Data_D              (Cache_D | 0x18)
+#define Index_Load_Data_S              (Cache_S | 0x18)
+#define Index_Store_Data_I             (Cache_I | 0x1c)
+#define Index_Store_Data_D             (Cache_D | 0x1c)
+#define Index_Store_Data_S             (Cache_S | 0x1c)
 
 /*
  * Loongson2-specific cacheops
  */
-#define Hit_Invalidate_I_Loongson2     0x00
+#define Hit_Invalidate_I_Loongson2     (Cache_I | 0x00)
 
 #endif /* __ASM_CACHEOPS_H */
index d1e04c9..eeec8c8 100644 (file)
 # define cpu_has_small_pages   (cpu_data[0].options & MIPS_CPU_SP)
 #endif
 
+#ifndef cpu_has_nan_legacy
+#define cpu_has_nan_legacy     (cpu_data[0].options & MIPS_CPU_NAN_LEGACY)
+#endif
+#ifndef cpu_has_nan_2008
+#define cpu_has_nan_2008       (cpu_data[0].options & MIPS_CPU_NAN_2008)
+#endif
+
 #endif /* __ASM_CPU_FEATURES_H */
index 82ad15f..a97ca97 100644 (file)
@@ -386,6 +386,8 @@ enum cpu_type_enum {
 #define MIPS_CPU_BP_GHIST      0x8000000000ull /* R12K+ Branch Prediction Global History */
 #define MIPS_CPU_SP            0x10000000000ull /* Small (1KB) page support */
 #define MIPS_CPU_FTLB          0x20000000000ull /* CPU has Fixed-page-size TLB */
+#define MIPS_CPU_NAN_LEGACY    0x40000000000ull /* Legacy NaN implemented */
+#define MIPS_CPU_NAN_2008      0x80000000000ull /* 2008 NaN implemented */
 
 /*
  * CPU ASE encodings
index b01a6ff..cefb7a5 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/fs.h>
 #include <uapi/linux/elf.h>
 
-#include <asm/cpu-info.h>
 #include <asm/current.h>
 
 /* ELF header e_flags defines. */
@@ -44,6 +43,7 @@
 #define EF_MIPS_OPTIONS_FIRST  0x00000080
 #define EF_MIPS_32BITMODE      0x00000100
 #define EF_MIPS_FP64           0x00000200
+#define EF_MIPS_NAN2008                0x00000400
 #define EF_MIPS_ABI            0x0000f000
 #define EF_MIPS_ARCH           0xf0000000
 
@@ -305,7 +305,7 @@ do {                                                                        \
                                                                        \
        current->thread.abi = &mips_abi;                                \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -367,7 +367,7 @@ do {                                                                        \
        else                                                            \
                current->thread.abi = &mips_abi;                        \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
                                                                        \
        p = personality(current->personality);                          \
        if (p != PER_LINUX32 && p != PER_LINUX)                         \
@@ -432,6 +432,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
                                       int uses_interp);
 
 struct arch_elf_state {
+       int nan_2008;
        int fp_abi;
        int interp_fp_abi;
        int overall_fp_mode;
@@ -440,17 +441,23 @@ struct arch_elf_state {
 #define MIPS_ABI_FP_UNKNOWN    (-1)    /* Unknown FP ABI (kernel internal) */
 
 #define INIT_ARCH_ELF_STATE {                  \
+       .nan_2008 = -1,                         \
        .fp_abi = MIPS_ABI_FP_UNKNOWN,          \
        .interp_fp_abi = MIPS_ABI_FP_UNKNOWN,   \
        .overall_fp_mode = -1,                  \
 }
 
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+extern bool mips_use_nan_legacy;
+extern bool mips_use_nan_2008;
+
 extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
                            bool is_interp, struct arch_elf_state *state);
 
-extern int arch_check_elf(void *ehdr, bool has_interpreter,
+extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
                          struct arch_elf_state *state);
 
+extern void mips_set_personality_nan(struct arch_elf_state *state);
 extern void mips_set_personality_fp(struct arch_elf_state *state);
 
 #endif /* _ASM_ELF_H */
index 2f021cd..3225c3c 100644 (file)
@@ -79,7 +79,7 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 /*
  * Break instruction with special math emu break code set
  */
-#define BREAK_MATH (0x0000000d | (BRK_MEMU << 16))
+#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
 
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
 
index d10fd80..2b4dc7a 100644 (file)
@@ -275,6 +275,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si
  */
 #define ioremap_cachable(offset, size)                                 \
        __ioremap_mode((offset), (size), _page_cachable_default)
+#define ioremap_cache ioremap_cachable
 
 /*
  * These two are MIPS specific ioremap variant.         ioremap_cacheable_cow
index e7b138b..65c351e 100644 (file)
@@ -84,41 +84,11 @@ static inline void arch_local_irq_restore(unsigned long flags)
        : "memory");
 }
 
-static inline void __arch_local_irq_restore(unsigned long flags)
-{
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-#if defined(CONFIG_IRQ_MIPS_CPU)
-       /*
-        * Slow, but doesn't suffer from a relatively unlikely race
-        * condition we're having since days 1.
-        */
-       "       beqz    %[flags], 1f                                    \n"
-       "       di                                                      \n"
-       "       ei                                                      \n"
-       "1:                                                             \n"
-#else
-       /*
-        * Fast, dangerous.  Life is fun, life is good.
-        */
-       "       mfc0    $1, $12                                         \n"
-       "       ins     $1, %[flags], 0, 1                              \n"
-       "       mtc0    $1, $12                                         \n"
-#endif
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (flags)
-       : "0" (flags)
-       : "memory");
-}
 #else
 /* Functions that require preempt_{dis,en}able() are in mips-atomic.c */
 void arch_local_irq_disable(void);
 unsigned long arch_local_irq_save(void);
 void arch_local_irq_restore(unsigned long flags);
-void __arch_local_irq_restore(unsigned long flags);
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 static inline void arch_local_irq_enable(void)
index 7c19144..f6b1279 100644 (file)
@@ -58,7 +58,7 @@
 #define KVM_MAX_VCPUS          1
 #define KVM_USER_MEM_SLOTS     8
 /* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS  0
+#define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 #define KVM_INVALID_INST               0xdeadbeef
 #define KVM_INVALID_ADDR               0xdeadbeef
 
-#define KVM_MALTA_GUEST_RTC_ADDR       0xb8000070UL
-
-#define GUEST_TICKS_PER_JIFFY          (40000000/HZ)
-#define MS_TO_NS(x)                    (x * 1E6L)
-
-#define CAUSEB_DC                      27
-#define CAUSEF_DC                      (_ULCAST_(1) << 27)
-
 extern atomic_t kvm_mips_instance;
 extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
@@ -289,34 +281,6 @@ enum mips_mmu_types {
        MMU_TYPE_R8000
 };
 
-/*
- * Trap codes
- */
-#define T_INT                  0       /* Interrupt pending */
-#define T_TLB_MOD              1       /* TLB modified fault */
-#define T_TLB_LD_MISS          2       /* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS          3       /* TLB miss on a store */
-#define T_ADDR_ERR_LD          4       /* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST          5       /* Address error on a store */
-#define T_BUS_ERR_IFETCH       6       /* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST                7       /* Bus error on a load or store */
-#define T_SYSCALL              8       /* System call */
-#define T_BREAK                        9       /* Breakpoint */
-#define T_RES_INST             10      /* Reserved instruction exception */
-#define T_COP_UNUSABLE         11      /* Coprocessor unusable */
-#define T_OVFLOW               12      /* Arithmetic overflow */
-
-/*
- * Trap definitions added for r4000 port.
- */
-#define T_TRAP                 13      /* Trap instruction */
-#define T_VCEI                 14      /* Virtual coherency exception */
-#define T_MSAFPE               14      /* MSA floating point exception */
-#define T_FPE                  15      /* Floating point exception */
-#define T_MSADIS               21      /* MSA disabled exception */
-#define T_WATCH                        23      /* Watch address reference */
-#define T_VCED                 31      /* Virtual coherency data */
-
 /* Resume Flags */
 #define RESUME_FLAG_DR         (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST       (1<<1)  /* Resume host? */
@@ -686,7 +650,6 @@ extern void kvm_mips_dump_host_tlbs(void);
 extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
 extern void kvm_mips_flush_host_tlb(int skip_kseg0);
 extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
-extern int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index);
 
 extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
                                     unsigned long entryhi);
index 4eee221..2b34872 100644 (file)
@@ -115,6 +115,7 @@ static inline int soc_is_qca955x(void)
        return soc_is_qca9556() || soc_is_qca9558();
 }
 
+void ath79_ddr_wb_flush(unsigned int reg);
 void ath79_ddr_set_pci_windows(void);
 
 extern void __iomem *ath79_pll_base;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
deleted file mode 100644 (file)
index 1e6b587..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef __BCM963XX_TAG_H
-#define __BCM963XX_TAG_H
-
-#define TAGVER_LEN             4       /* Length of Tag Version */
-#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
-#define SIG1_LEN               20      /* Company Signature 1 Length */
-#define SIG2_LEN               14      /* Company Signature 2 Length */
-#define BOARDID_LEN            16      /* Length of BoardId */
-#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
-#define CHIPID_LEN             6       /* Chip Id Length */
-#define IMAGE_LEN              10      /* Length of Length Field */
-#define ADDRESS_LEN            12      /* Length of Address field */
-#define DUALFLAG_LEN           2       /* Dual Image flag Length */
-#define INACTIVEFLAG_LEN       2       /* Inactie Flag Length */
-#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
-#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
-#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
-#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
-#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
-
-#define NUM_PIRELLI            2
-#define IMAGETAG_CRC_START     0xFFFFFFFF
-
-#define PIRELLI_BOARDS { \
-       "AGPF-S0", \
-       "DWV-S0", \
-}
-
-/*
- * The broadcom firmware assumes the rootfs starts the image,
- * therefore uses the rootfs start (flash_image_address)
- * to determine where to flash the image.  Since we have the kernel first
- * we have to give it the kernel address, but the crc uses the length
- * associated with this address (root_length), which is added to the kernel
- * length (kernel_length) to determine the length of image to flash and thus
- * needs to be rootfs + deadcode (jffs2 EOF marker)
-*/
-
-struct bcm_tag {
-       /* 0-3: Version of the image tag */
-       char tag_version[TAGVER_LEN];
-       /* 4-23: Company Line 1 */
-       char sig_1[SIG1_LEN];
-       /*  24-37: Company Line 2 */
-       char sig_2[SIG2_LEN];
-       /* 38-43: Chip this image is for */
-       char chip_id[CHIPID_LEN];
-       /* 44-59: Board name */
-       char board_id[BOARDID_LEN];
-       /* 60-61: Map endianness -- 1 BE 0 LE */
-       char big_endian[ENDIANFLAG_LEN];
-       /* 62-71: Total length of image */
-       char total_length[IMAGE_LEN];
-       /* 72-83: Address in memory of CFE */
-       char cfe__address[ADDRESS_LEN];
-       /* 84-93: Size of CFE */
-       char cfe_length[IMAGE_LEN];
-       /* 94-105: Address in memory of image start
-        * (kernel for OpenWRT, rootfs for stock firmware)
-        */
-       char flash_image_start[ADDRESS_LEN];
-       /* 106-115: Size of rootfs */
-       char root_length[IMAGE_LEN];
-       /* 116-127: Address in memory of kernel */
-       char kernel_address[ADDRESS_LEN];
-       /* 128-137: Size of kernel */
-       char kernel_length[IMAGE_LEN];
-       /* 138-139: Unused at the moment */
-       char dual_image[DUALFLAG_LEN];
-       /* 140-141: Unused at the moment */
-       char inactive_flag[INACTIVEFLAG_LEN];
-       /* 142-161: RSA Signature (not used; some vendors may use this) */
-       char rsa_signature[RSASIG_LEN];
-       /* 162-191: Compilation and related information (not used in OpenWrt) */
-       char information1[TAGINFO1_LEN];
-       /* 192-195: Version flash layout */
-       char flash_layout_ver[FLASHLAYOUTVER_LEN];
-       /* 196-199: kernel+rootfs CRC32 */
-       __u32 fskernel_crc;
-       /* 200-215: Unused except on Alice Gate where is is information */
-       char information2[TAGINFO2_LEN];
-       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
-       __u32 image_crc;
-       /* 220-223: CRC32 of rootfs partition */
-       __u32 rootfs_crc;
-       /* 224-227: CRC32 of kernel partition */
-       __u32 kernel_crc;
-       /* 228-235: Unused at present */
-       char reserved1[8];
-       /* 236-239: CRC32 of header excluding last 20 bytes */
-       __u32 header_crc;
-       /* 240-255: Unused at present */
-       char reserved2[16];
-};
-
-#endif /* __BCM63XX_TAG_H */
diff --git a/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..4682308
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+
+/*
+ * CPU feature overrides for PIC32 boards
+ */
+#ifdef CONFIG_CPU_MIPS32
+#define cpu_has_vint           1
+#define cpu_has_veic           0
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_4k_cache       1
+#define cpu_has_fpu            0
+#define cpu_has_counter                1
+#define cpu_has_llsc           1
+#define cpu_has_nofpuex                0
+#define cpu_icache_snoops_remote_store 1
+#endif
+
+#ifdef CONFIG_CPU_MIPS64
+#error This platform does not support 64bit.
+#endif
+
+#endif /* __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-pic32/irq.h b/arch/mips/include/asm/mach-pic32/irq.h
new file mode 100644 (file)
index 0000000..864330c
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_IRQ_H
+#define __ASM_MACH_PIC32_IRQ_H
+
+#define NR_IRQS        256
+#define MIPS_CPU_IRQ_BASE 0
+
+#include_next <irq.h>
+
+#endif /* __ASM_MACH_PIC32_IRQ_H */
diff --git a/arch/mips/include/asm/mach-pic32/pic32.h b/arch/mips/include/asm/mach-pic32/pic32.h
new file mode 100644 (file)
index 0000000..ce52e91
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_H
+#define _ASM_MACH_PIC32_H
+
+#include <linux/io.h>
+
+/*
+ * PIC32 register offsets for SET/CLR/INV where supported.
+ */
+#define PIC32_CLR(_reg)                ((_reg) + 0x04)
+#define PIC32_SET(_reg)                ((_reg) + 0x08)
+#define PIC32_INV(_reg)                ((_reg) + 0x0C)
+
+/*
+ * PIC32 Base Register Offsets
+ */
+#define PIC32_BASE_CONFIG      0x1f800000
+#define PIC32_BASE_OSC         0x1f801200
+#define PIC32_BASE_RESET       0x1f801240
+#define PIC32_BASE_PPS         0x1f801400
+#define PIC32_BASE_UART                0x1f822000
+#define PIC32_BASE_PORT                0x1f860000
+#define PIC32_BASE_DEVCFG2     0x1fc4ff44
+
+/*
+ * Register unlock sequence required for some register access.
+ */
+void pic32_syskey_unlock_debug(const char *fn, const ulong ln);
+#define pic32_syskey_unlock()  \
+       pic32_syskey_unlock_debug(__func__, __LINE__)
+
+#endif /* _ASM_MACH_PIC32_H */
diff --git a/arch/mips/include/asm/mach-pic32/spaces.h b/arch/mips/include/asm/mach-pic32/spaces.h
new file mode 100644 (file)
index 0000000..046a0a9
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_SPACES_H
+#define _ASM_MACH_PIC32_SPACES_H
+
+#ifdef CONFIG_PIC32MZDA
+#define PHYS_OFFSET    _AC(0x08000000, UL)
+#define UNCAC_BASE     _AC(0xa8000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_MACH_PIC32_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ralink/irq.h b/arch/mips/include/asm/mach-ralink/irq.h
new file mode 100644 (file)
index 0000000..4321865
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __ASM_MACH_RALINK_IRQ_H
+#define __ASM_MACH_RALINK_IRQ_H
+
+#define GIC_NUM_INTRS  64
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
new file mode 100644 (file)
index 0000000..610b61e
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _MT7621_REGS_H_
+#define _MT7621_REGS_H_
+
+#define MT7621_PALMBUS_BASE            0x1C000000
+#define MT7621_PALMBUS_SIZE            0x03FFFFFF
+
+#define MT7621_SYSC_BASE               0x1E000000
+
+#define SYSC_REG_CHIP_NAME0            0x00
+#define SYSC_REG_CHIP_NAME1            0x04
+#define SYSC_REG_CHIP_REV              0x0c
+#define SYSC_REG_SYSTEM_CONFIG0                0x10
+#define SYSC_REG_SYSTEM_CONFIG1                0x14
+
+#define CHIP_REV_PKG_MASK              0x1
+#define CHIP_REV_PKG_SHIFT             16
+#define CHIP_REV_VER_MASK              0xf
+#define CHIP_REV_VER_SHIFT             8
+#define CHIP_REV_ECO_MASK              0xf
+
+#define MT7621_DRAM_BASE                0x0
+#define MT7621_DDR2_SIZE_MIN           32
+#define MT7621_DDR2_SIZE_MAX           256
+
+#define MT7621_CHIP_NAME0              0x3637544D
+#define MT7621_CHIP_NAME1              0x20203132
+
+#define MIPS_GIC_IRQ_BASE           (MIPS_CPU_IRQ_BASE + 8)
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..15db1b3
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Ralink MT7621 specific CPU feature overrides
+ *
+ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ * Copyright (C) 2015 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This file was derived from: include/asm-mips/cpu-features.h
+ *     Copyright (C) 2003, 2004 Ralf Baechle
+ *     Copyright (C) 2004 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef _MT7621_CPU_FEATURE_OVERRIDES_H
+#define _MT7621_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_3k_cache       0
+#define cpu_has_4k_cache       1
+#define cpu_has_tx39_cache     0
+#define cpu_has_sb1_cache      0
+#define cpu_has_fpu            0
+#define cpu_has_32fpr          0
+#define cpu_has_counter                1
+#define cpu_has_watch          1
+#define cpu_has_divec          1
+
+#define cpu_has_prefetch       1
+#define cpu_has_ejtag          1
+#define cpu_has_llsc           1
+
+#define cpu_has_mips16         1
+#define cpu_has_mdmx           0
+#define cpu_has_mips3d         0
+#define cpu_has_smartmips      0
+
+#define cpu_has_mips32r1       1
+#define cpu_has_mips32r2       1
+#define cpu_has_mips64r1       0
+#define cpu_has_mips64r2       0
+
+#define cpu_has_dsp            1
+#define cpu_has_dsp2           0
+#define cpu_has_mipsmt         1
+
+#define cpu_has_64bits         0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs  0
+#define cpu_has_64bit_addresses        0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#define cpu_has_dc_aliases     0
+#define cpu_has_vtag_icache    0
+
+#define cpu_has_rixi           0
+#define cpu_has_tlbinv         0
+#define cpu_has_userlocal      1
+
+#endif /* _MT7621_CPU_FEATURE_OVERRIDES_H */
index 6516e9d..b196825 100644 (file)
@@ -243,6 +243,10 @@ BUILD_CM_Cx_R_(tcid_8_priority,    0x80)
 #define  CM_GCR_BASE_CMDEFTGT_IOCU0            2
 #define  CM_GCR_BASE_CMDEFTGT_IOCU1            3
 
+/* GCR_RESET_EXT_BASE register fields */
+#define CM_GCR_RESET_EXT_BASE_EVARESET         BIT(31)
+#define CM_GCR_RESET_EXT_BASE_UEB              BIT(30)
+
 /* GCR_ACCESS register fields */
 #define CM_GCR_ACCESS_ACCESSEN_SHF             0
 #define CM_GCR_ACCESS_ACCESSEN_MSK             (_ULCAST_(0xff) << 0)
index 4b89f28..1f6ea83 100644 (file)
@@ -52,7 +52,7 @@ do {                                                                  \
        __this_cpu_inc(mipsr2emustats.M);                               \
        err = __get_user(nir, (u32 __user *)regs->cp0_epc);             \
        if (!err) {                                                     \
-               if (nir == BREAK_MATH)                                  \
+               if (nir == BREAK_MATH(0))                               \
                        __this_cpu_inc(mipsr2bdemustats.M);             \
        }                                                               \
        preempt_enable();                                               \
index e43aca1..3ad19ad 100644 (file)
 #define CAUSEF_IV              (_ULCAST_(1)   << 23)
 #define CAUSEB_PCI             26
 #define CAUSEF_PCI             (_ULCAST_(1)   << 26)
+#define CAUSEB_DC              27
+#define CAUSEF_DC              (_ULCAST_(1)   << 27)
 #define CAUSEB_CE              28
 #define CAUSEF_CE              (_ULCAST_(3)   << 28)
 #define CAUSEB_TI              30
 #define CAUSEB_BD              31
 #define CAUSEF_BD              (_ULCAST_(1)   << 31)
 
+/*
+ * Cause.ExcCode trap codes.
+ */
+#define EXCCODE_INT            0       /* Interrupt pending */
+#define EXCCODE_MOD            1       /* TLB modified fault */
+#define EXCCODE_TLBL           2       /* TLB miss on load or ifetch */
+#define EXCCODE_TLBS           3       /* TLB miss on a store */
+#define EXCCODE_ADEL           4       /* Address error on a load or ifetch */
+#define EXCCODE_ADES           5       /* Address error on a store */
+#define EXCCODE_IBE            6       /* Bus error on an ifetch */
+#define EXCCODE_DBE            7       /* Bus error on a load or store */
+#define EXCCODE_SYS            8       /* System call */
+#define EXCCODE_BP             9       /* Breakpoint */
+#define EXCCODE_RI             10      /* Reserved instruction exception */
+#define EXCCODE_CPU            11      /* Coprocessor unusable */
+#define EXCCODE_OV             12      /* Arithmetic overflow */
+#define EXCCODE_TR             13      /* Trap instruction */
+#define EXCCODE_MSAFPE         14      /* MSA floating point exception */
+#define EXCCODE_FPE            15      /* Floating point exception */
+#define EXCCODE_TLBRI          19      /* TLB Read-Inhibit exception */
+#define EXCCODE_TLBXI          20      /* TLB Execution-Inhibit exception */
+#define EXCCODE_MSADIS         21      /* MSA disabled exception */
+#define EXCCODE_MDMX           22      /* MDMX unusable exception */
+#define EXCCODE_WATCH          23      /* Watch address reference */
+#define EXCCODE_MCHECK         24      /* Machine check */
+#define EXCCODE_THREAD         25      /* Thread exceptions (MT) */
+#define EXCCODE_DSPDIS         26      /* DSP disabled exception */
+#define EXCCODE_GE             27      /* Virtualized guest exception (VZ) */
+
+/* Implementation specific trap codes used by MIPS cores */
+#define MIPS_EXCCODE_TLBPAR    16      /* TLB parity error exception */
+
 /*
  * Bits in the coprocessor 0 config register.
  */
index 2046c02..21ed715 100644 (file)
@@ -33,7 +33,7 @@
 #define PAGE_SHIFT     16
 #endif
 #define PAGE_SIZE      (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK      (~(PAGE_SIZE - 1))
+#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
 
 /*
  * This is used for calculating the real page sizes
index 6995b4a..9a4fe01 100644 (file)
@@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 static inline pte_t pte_mkyoung(pte_t pte)
 {
        pte_val(pte) |= _PAGE_ACCESSED;
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pte_val(pte) & _PAGE_NO_READ))
                pte_val(pte) |= _PAGE_SILENT_READ;
        else
@@ -542,7 +542,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
        pmd_val(pmd) |= _PAGE_ACCESSED;
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pmd_val(pmd) & _PAGE_NO_READ))
                pmd_val(pmd) |= _PAGE_SILENT_READ;
        else
index 9b44d5a..ddea53e 100644 (file)
@@ -116,7 +116,8 @@ enum cop_op {
        dmtc_op       = 0x05, ctc_op        = 0x06,
        mthc0_op      = 0x06, mthc_op       = 0x07,
        bc_op         = 0x08, bc1eqz_op     = 0x09,
-       bc1nez_op     = 0x0d, cop_op        = 0x10,
+       mfmc0_op      = 0x0b, bc1nez_op     = 0x0d,
+       wrpgpr_op     = 0x0e, cop_op        = 0x10,
        copm_op       = 0x18
 };
 
@@ -529,7 +530,7 @@ enum MIPS6e_i8_func {
 };
 
 /*
- * (microMIPS & MIPS16e) NOP instruction.
+ * (microMIPS) NOP instruction.
  */
 #define MM_NOP16       0x0c00
 
@@ -679,7 +680,7 @@ struct fp0_format {         /* FPU multiply and add format (MIPS32) */
        ;))))))
 };
 
-struct mm_fp0_format {         /* FPU multipy and add format (microMIPS) */
+struct mm_fp0_format {         /* FPU multiply and add format (microMIPS) */
        __BITFIELD_FIELD(unsigned int opcode : 6,
        __BITFIELD_FIELD(unsigned int ft : 5,
        __BITFIELD_FIELD(unsigned int fs : 5,
@@ -799,6 +800,13 @@ struct mm_x_format {               /* Scaled indexed load format (microMIPS) */
        ;)))))
 };
 
+struct mm_a_format {           /* ADDIUPC format (microMIPS) */
+       __BITFIELD_FIELD(unsigned int opcode : 6,
+       __BITFIELD_FIELD(unsigned int rs : 3,
+       __BITFIELD_FIELD(signed int simmediate : 23,
+       ;)))
+};
+
 /*
  * microMIPS instruction formats (16-bit length)
  */
@@ -940,6 +948,7 @@ union mips_instruction {
        struct mm_i_format mm_i_format;
        struct mm_m_format mm_m_format;
        struct mm_x_format mm_x_format;
+       struct mm_a_format mm_a_format;
        struct mm_b0_format mm_b0_format;
        struct mm_b1_format mm_b1_format;
        struct mm16_m_format mm16_m_format ;
index 09f4034..6392dbe 100644 (file)
@@ -190,7 +190,7 @@ static inline void check_daddi(void)
        printk("Checking for the daddi bug... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        /*
         * The following code fails to trigger an overflow exception
         * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
@@ -214,7 +214,7 @@ static inline void check_daddi(void)
                ".set   pop"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
@@ -225,14 +225,14 @@ static inline void check_daddi(void)
        printk("yes, workaround... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        asm volatile(
                "addiu  %1, $0, %2\n\t"
                "dsrl   %1, %1, 1\n\t"
                "daddi  %0, %1, %3"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
index 6b90644..b725b71 100644 (file)
@@ -98,6 +98,161 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
        c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
 }
 
+/*
+ * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
+ * supported by FPU hardware.
+ */
+static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
+{
+       if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                           MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                           MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+               unsigned long sr, fir, fcsr, fcsr0, fcsr1;
+
+               sr = read_c0_status();
+               __enable_fpu(FPU_AS_IS);
+
+               fir = read_32bit_cp1_register(CP1_REVISION);
+               if (fir & MIPS_FPIR_HAS2008) {
+                       fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+                       write_32bit_cp1_register(CP1_STATUS, fcsr0);
+                       fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+                       write_32bit_cp1_register(CP1_STATUS, fcsr1);
+                       fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+                       write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+                       if (!(fcsr0 & FPU_CSR_NAN2008))
+                               c->options |= MIPS_CPU_NAN_LEGACY;
+                       if (fcsr1 & FPU_CSR_NAN2008)
+                               c->options |= MIPS_CPU_NAN_2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
+                               c->fpu_msk31 &= ~FPU_CSR_ABS2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
+                               c->fpu_msk31 &= ~FPU_CSR_NAN2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+               }
+
+               write_c0_status(sr);
+       } else {
+               c->options |= MIPS_CPU_NAN_LEGACY;
+       }
+}
+
+/*
+ * IEEE 754 conformance mode to use.  Affects the NaN encoding and the
+ * ABS.fmt/NEG.fmt execution mode.
+ */
+static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
+
+/*
+ * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
+ * to support by the FPU emulator according to the IEEE 754 conformance
+ * mode selected.  Note that "relaxed" straps the emulator so that it
+ * allows 2008-NaN binaries even for legacy processors.
+ */
+static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
+{
+       c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
+       c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+       c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+
+       switch (ieee754) {
+       case STRICT:
+               if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                                   MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                                   MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+                       c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+                       c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               }
+               break;
+       case LEGACY:
+               c->options |= MIPS_CPU_NAN_LEGACY;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case STD2008:
+               c->options |= MIPS_CPU_NAN_2008;
+               c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case RELAXED:
+               c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               break;
+       }
+}
+
+/*
+ * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * according to the "ieee754=" parameter.
+ */
+static void cpu_set_nan_2008(struct cpuinfo_mips *c)
+{
+       switch (ieee754) {
+       case STRICT:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case LEGACY:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !cpu_has_nan_legacy;
+               break;
+       case STD2008:
+               mips_use_nan_legacy = !cpu_has_nan_2008;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case RELAXED:
+               mips_use_nan_legacy = true;
+               mips_use_nan_2008 = true;
+               break;
+       }
+}
+
+/*
+ * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
+ * settings:
+ *
+ * strict:  accept binaries that request a NaN encoding supported by the FPU
+ * legacy:  only accept legacy-NaN binaries
+ * 2008:    only accept 2008-NaN binaries
+ * relaxed: accept any binaries regardless of whether supported by the FPU
+ */
+static int __init ieee754_setup(char *s)
+{
+       if (!s)
+               return -1;
+       else if (!strcmp(s, "strict"))
+               ieee754 = STRICT;
+       else if (!strcmp(s, "legacy"))
+               ieee754 = LEGACY;
+       else if (!strcmp(s, "2008"))
+               ieee754 = STD2008;
+       else if (!strcmp(s, "relaxed"))
+               ieee754 = RELAXED;
+       else
+               return -1;
+
+       if (!(boot_cpu_data.options & MIPS_CPU_FPU))
+               cpu_set_nofpu_2008(&boot_cpu_data);
+       cpu_set_nan_2008(&boot_cpu_data);
+
+       return 0;
+}
+
+early_param("ieee754", ieee754_setup);
+
 /*
  * Set the FIR feature flags for the FPU emulator.
  */
@@ -113,6 +268,8 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
        if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
                            MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
                value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+       if (c->options & MIPS_CPU_NAN_2008)
+               value |= MIPS_FPIR_HAS2008;
        c->fpu_id = value;
 }
 
@@ -137,6 +294,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
        }
 
        cpu_set_fpu_fcsr_mask(c);
+       cpu_set_fpu_2008(c);
+       cpu_set_nan_2008(c);
 }
 
 /*
@@ -147,6 +306,8 @@ static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
        c->options &= ~MIPS_CPU_FPU;
        c->fpu_msk31 = mips_nofpu_msk31;
 
+       cpu_set_nofpu_2008(c);
+       cpu_set_nan_2008(c);
        cpu_set_nofpu_id(c);
 }
 
index 4a4d9e0..c3c234d 100644 (file)
 #include <linux/elf.h>
 #include <linux/sched.h>
 
+#include <asm/cpu-info.h>
+
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+bool mips_use_nan_legacy;
+bool mips_use_nan_2008;
+
 /* FPU modes */
 enum {
        FP_FRE,
@@ -68,15 +74,23 @@ static struct mode_req none_req = { true, true, false, true, true };
 int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
                     bool is_interp, struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr32 = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
        struct elf32_phdr *phdr32 = _phdr;
        struct elf64_phdr *phdr64 = _phdr;
        struct mips_elf_abiflags_v0 abiflags;
+       bool elf32;
+       u32 flags;
        int ret;
 
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
        /* Lets see if this is an O32 ELF */
-       if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-               if (ehdr32->e_flags & EF_MIPS_FP64) {
+       if (elf32) {
+               if (flags & EF_MIPS_FP64) {
                        /*
                         * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
                         * later if needed
@@ -120,13 +134,50 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
        return 0;
 }
 
-int arch_check_elf(void *_ehdr, bool has_interpreter,
+int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
                   struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *iehdr = _interp_ehdr;
        struct mode_req prog_req, interp_req;
        int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
-       bool is_mips64;
+       bool elf32;
+       u32 flags;
+
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
+       /*
+        * Determine the NaN personality, reject the binary if not allowed.
+        * Also ensure that any interpreter matches the executable.
+        */
+       if (flags & EF_MIPS_NAN2008) {
+               if (mips_use_nan_2008)
+                       state->nan_2008 = 1;
+               else
+                       return -ENOEXEC;
+       } else {
+               if (mips_use_nan_legacy)
+                       state->nan_2008 = 0;
+               else
+                       return -ENOEXEC;
+       }
+       if (has_interpreter) {
+               bool ielf32;
+               u32 iflags;
+
+               ielf32 = iehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+               iflags = ielf32 ? iehdr->e32.e_flags : iehdr->e64.e_flags;
+
+               if ((flags ^ iflags) & EF_MIPS_NAN2008)
+                       return -ELIBBAD;
+       }
 
        if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
                return 0;
@@ -142,21 +193,18 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
                abi0 = abi1 = fp_abi;
        }
 
-       is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
-                   (ehdr->e_flags & EF_MIPS_ABI2);
+       if (elf32 && !(flags & EF_MIPS_ABI2)) {
+               /* Default to a mode capable of running code expecting FR=0 */
+               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
 
-       if (is_mips64) {
+               /* Allow all ABIs we know about */
+               max_abi = MIPS_ABI_FP_64A;
+       } else {
                /* MIPS64 code always uses FR=1, thus the default is easy */
                state->overall_fp_mode = FP_FR1;
 
                /* Disallow access to the various FPXX & FP64 ABIs */
                max_abi = MIPS_ABI_FP_SOFT;
-       } else {
-               /* Default to a mode capable of running code expecting FR=0 */
-               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
-
-               /* Allow all ABIs we know about */
-               max_abi = MIPS_ABI_FP_64A;
        }
 
        if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
@@ -254,3 +302,27 @@ void mips_set_personality_fp(struct arch_elf_state *state)
                BUG();
        }
 }
+
+/*
+ * Select the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * in FCSR according to the ELF NaN personality.
+ */
+void mips_set_personality_nan(struct arch_elf_state *state)
+{
+       struct cpuinfo_mips *c = &boot_cpu_data;
+       struct task_struct *t = current;
+
+       t->thread.fpu.fcr31 = c->fpu_csr31;
+       switch (state->nan_2008) {
+       case 0:
+               break;
+       case 1:
+               if (!(c->fpu_msk31 & FPU_CSR_NAN2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_NAN2008;
+               if (!(c->fpu_msk31 & FPU_CSR_ABS2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_ABS2008;
+               break;
+       default:
+               BUG();
+       }
+}
index c6854d9..705be43 100644 (file)
@@ -21,7 +21,7 @@ static struct txx9_pio_reg __iomem *txx9_pioptr;
 
 static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
-       return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+       return !!(__raw_readl(&txx9_pioptr->din) & (1 << offset));
 }
 
 static void txx9_gpio_set_raw(unsigned int offset, int value)
index 4f0ac78..a5279b2 100644 (file)
@@ -548,9 +548,6 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(c0_badvaddr, cp0_badvaddr),
        REG_OFFSET_NAME(c0_cause, cp0_cause),
        REG_OFFSET_NAME(c0_epc, cp0_epc),
-#ifdef CONFIG_MIPS_MT_SMTC
-       REG_OFFSET_NAME(c0_tcstatus, cp0_tcstatus),
-#endif
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
        REG_OFFSET_NAME(mpl0, mpl[0]),
        REG_OFFSET_NAME(mpl1, mpl[1]),
index 66aac55..569a7d5 100644 (file)
@@ -623,7 +623,7 @@ static void __init request_crashkernel(struct resource *res)
 
 #define USE_PROM_CMDLINE       IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
 #define USE_DTB_CMDLINE                IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_EXTEND)
+#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
 
 static void __init arch_mem_init(char **cmdline_p)
 {
index e04c805..2ad4e4c 100644 (file)
@@ -202,6 +202,9 @@ static void boot_core(unsigned core)
        /* Ensure its coherency is disabled */
        write_gcr_co_coherence(0);
 
+       /* Start it with the legacy memory map and exception base */
+       write_gcr_co_reset_ext_base(CM_GCR_RESET_EXT_BASE_UEB);
+
        /* Ensure the core can access the GCRs */
        access = read_gcr_access();
        access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + core);
index 2242bdd..4472a7f 100644 (file)
 #include <asm/barrier.h>
 #include <asm/mipsregs.h>
 
-static atomic_t count_start_flag = ATOMIC_INIT(0);
+static unsigned int initcount = 0;
 static atomic_t count_count_start = ATOMIC_INIT(0);
 static atomic_t count_count_stop = ATOMIC_INIT(0);
-static atomic_t count_reference = ATOMIC_INIT(0);
 
 #define COUNTON 100
-#define NR_LOOPS 5
+#define NR_LOOPS 3
 
 void synchronise_count_master(int cpu)
 {
        int i;
        unsigned long flags;
-       unsigned int initcount;
 
        printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu);
 
        local_irq_save(flags);
 
-       /*
-        * Notify the slaves that it's time to start
-        */
-       atomic_set(&count_reference, read_c0_count());
-       atomic_set(&count_start_flag, cpu);
-       smp_wmb();
-
-       /* Count will be initialised to current timer for all CPU's */
-       initcount = read_c0_count();
-
        /*
         * We loop a few times to get a primed instruction cache,
         * then the last pass is more or less synchronised and
@@ -63,9 +51,13 @@ void synchronise_count_master(int cpu)
                atomic_set(&count_count_stop, 0);
                smp_wmb();
 
-               /* this lets the slaves write their count register */
+               /* Let the slave writes its count register */
                atomic_inc(&count_count_start);
 
+               /* Count will be initialised to current timer */
+               if (i == 1)
+                       initcount = read_c0_count();
+
                /*
                 * Everyone initialises count in the last loop:
                 */
@@ -73,7 +65,7 @@ void synchronise_count_master(int cpu)
                        write_c0_count(initcount);
 
                /*
-                * Wait for all slaves to leave the synchronization point:
+                * Wait for slave to leave the synchronization point:
                 */
                while (atomic_read(&count_count_stop) != 1)
                        mb();
@@ -83,7 +75,6 @@ void synchronise_count_master(int cpu)
        }
        /* Arrange for an interrupt in a short while */
        write_c0_compare(read_c0_count() + COUNTON);
-       atomic_set(&count_start_flag, 0);
 
        local_irq_restore(flags);
 
@@ -98,19 +89,12 @@ void synchronise_count_master(int cpu)
 void synchronise_count_slave(int cpu)
 {
        int i;
-       unsigned int initcount;
 
        /*
         * Not every cpu is online at the time this gets called,
         * so we first wait for the master to say everyone is ready
         */
 
-       while (atomic_read(&count_start_flag) != cpu)
-               mb();
-
-       /* Count will be initialised to next expire for all CPU's */
-       initcount = atomic_read(&count_reference);
-
        for (i = 0; i < NR_LOOPS; i++) {
                atomic_inc(&count_count_start);
                while (atomic_read(&count_count_start) != 2)
index 886cb19..bafcb7a 100644 (file)
@@ -2250,7 +2250,7 @@ void __init trap_init(void)
         * Only some CPUs have the watch exceptions.
         */
        if (cpu_has_watch)
-               set_except_vector(23, handle_watch);
+               set_except_vector(EXCCODE_WATCH, handle_watch);
 
        /*
         * Initialise interrupt handlers
@@ -2277,27 +2277,27 @@ void __init trap_init(void)
        if (board_be_init)
                board_be_init();
 
-       set_except_vector(0, using_rollback_handler() ? rollback_handle_int
-                                                     : handle_int);
-       set_except_vector(1, handle_tlbm);
-       set_except_vector(2, handle_tlbl);
-       set_except_vector(3, handle_tlbs);
+       set_except_vector(EXCCODE_INT, using_rollback_handler() ?
+                                       rollback_handle_int : handle_int);
+       set_except_vector(EXCCODE_MOD, handle_tlbm);
+       set_except_vector(EXCCODE_TLBL, handle_tlbl);
+       set_except_vector(EXCCODE_TLBS, handle_tlbs);
 
-       set_except_vector(4, handle_adel);
-       set_except_vector(5, handle_ades);
+       set_except_vector(EXCCODE_ADEL, handle_adel);
+       set_except_vector(EXCCODE_ADES, handle_ades);
 
-       set_except_vector(6, handle_ibe);
-       set_except_vector(7, handle_dbe);
+       set_except_vector(EXCCODE_IBE, handle_ibe);
+       set_except_vector(EXCCODE_DBE, handle_dbe);
 
-       set_except_vector(8, handle_sys);
-       set_except_vector(9, handle_bp);
-       set_except_vector(10, rdhwr_noopt ? handle_ri :
+       set_except_vector(EXCCODE_SYS, handle_sys);
+       set_except_vector(EXCCODE_BP, handle_bp);
+       set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
                          (cpu_has_vtag_icache ?
                           handle_ri_rdhwr_vivt : handle_ri_rdhwr));
-       set_except_vector(11, handle_cpu);
-       set_except_vector(12, handle_ov);
-       set_except_vector(13, handle_tr);
-       set_except_vector(14, handle_msa_fpe);
+       set_except_vector(EXCCODE_CPU, handle_cpu);
+       set_except_vector(EXCCODE_OV, handle_ov);
+       set_except_vector(EXCCODE_TR, handle_tr);
+       set_except_vector(EXCCODE_MSAFPE, handle_msa_fpe);
 
        if (current_cpu_type() == CPU_R6000 ||
            current_cpu_type() == CPU_R6000A) {
@@ -2318,25 +2318,25 @@ void __init trap_init(void)
                board_nmi_handler_setup();
 
        if (cpu_has_fpu && !cpu_has_nofpuex)
-               set_except_vector(15, handle_fpe);
+               set_except_vector(EXCCODE_FPE, handle_fpe);
 
-       set_except_vector(16, handle_ftlb);
+       set_except_vector(MIPS_EXCCODE_TLBPAR, handle_ftlb);
 
        if (cpu_has_rixiex) {
-               set_except_vector(19, tlb_do_page_fault_0);
-               set_except_vector(20, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBRI, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBXI, tlb_do_page_fault_0);
        }
 
-       set_except_vector(21, handle_msa);
-       set_except_vector(22, handle_mdmx);
+       set_except_vector(EXCCODE_MSADIS, handle_msa);
+       set_except_vector(EXCCODE_MDMX, handle_mdmx);
 
        if (cpu_has_mcheck)
-               set_except_vector(24, handle_mcheck);
+               set_except_vector(EXCCODE_MCHECK, handle_mcheck);
 
        if (cpu_has_mipsmt)
-               set_except_vector(25, handle_mt);
+               set_except_vector(EXCCODE_THREAD, handle_mt);
 
-       set_except_vector(26, handle_dsp);
+       set_except_vector(EXCCODE_DSPDIS, handle_dsp);
 
        if (board_cache_error_setup)
                board_cache_error_setup();
index 313c2e3..d88aa21 100644 (file)
@@ -11,4 +11,4 @@
 #include <linux/kvm_host.h>
 
 struct kvm_mips_callbacks *kvm_mips_callbacks;
-EXPORT_SYMBOL(kvm_mips_callbacks);
+EXPORT_SYMBOL_GPL(kvm_mips_callbacks);
index 521121b..f1527a4 100644 (file)
@@ -86,10 +86,8 @@ int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        } else {
                mfc0_inst = LW_TEMPLATE;
                mfc0_inst |= ((rt & 0x1f) << 16);
-               mfc0_inst |=
-                   offsetof(struct mips_coproc,
-                            reg[rd][sel]) + offsetof(struct kvm_mips_commpage,
-                                                     cop0);
+               mfc0_inst |= offsetof(struct kvm_mips_commpage,
+                                     cop0.reg[rd][sel]);
        }
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
@@ -123,9 +121,7 @@ int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        sel = inst & 0x7;
 
        mtc0_inst |= ((rt & 0x1f) << 16);
-       mtc0_inst |=
-           offsetof(struct mips_coproc,
-                    reg[rd][sel]) + offsetof(struct kvm_mips_commpage, cop0);
+       mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
                kseg0_opc =
index 1b675c7..b37954c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/random.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/cacheops.h>
 #include <asm/cpu-info.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -29,7 +30,6 @@
 #include <asm/r4kcache.h>
 #define CONFIG_MIPS_MT
 
-#include "opcode.h"
 #include "interrupt.h"
 #include "commpage.h"
 
@@ -1239,21 +1239,20 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                        er = EMULATE_FAIL;
                        break;
 
-               case mfmcz_op:
+               case mfmc0_op:
 #ifdef KVM_MIPS_DEBUG_COP0_COUNTERS
                        cop0->stat[MIPS_CP0_STATUS][0]++;
 #endif
-                       if (rt != 0) {
+                       if (rt != 0)
                                vcpu->arch.gprs[rt] =
                                    kvm_read_c0_guest_status(cop0);
-                       }
                        /* EI */
                        if (inst & 0x20) {
-                               kvm_debug("[%#lx] mfmcz_op: EI\n",
+                               kvm_debug("[%#lx] mfmc0_op: EI\n",
                                          vcpu->arch.pc);
                                kvm_set_c0_guest_status(cop0, ST0_IE);
                        } else {
-                               kvm_debug("[%#lx] mfmcz_op: DI\n",
+                               kvm_debug("[%#lx] mfmc0_op: DI\n",
                                          vcpu->arch.pc);
                                kvm_clear_c0_guest_status(cop0, ST0_IE);
                        }
@@ -1545,19 +1544,6 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
        return 0;
 }
 
-#define MIPS_CACHE_OP_INDEX_INV         0x0
-#define MIPS_CACHE_OP_INDEX_LD_TAG      0x1
-#define MIPS_CACHE_OP_INDEX_ST_TAG      0x2
-#define MIPS_CACHE_OP_IMP               0x3
-#define MIPS_CACHE_OP_HIT_INV           0x4
-#define MIPS_CACHE_OP_FILL_WB_INV       0x5
-#define MIPS_CACHE_OP_HIT_HB            0x6
-#define MIPS_CACHE_OP_FETCH_LOCK        0x7
-
-#define MIPS_CACHE_ICACHE               0x0
-#define MIPS_CACHE_DCACHE               0x1
-#define MIPS_CACHE_SEC                  0x3
-
 enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
                                             uint32_t cause,
                                             struct kvm_run *run,
@@ -1582,8 +1568,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
        base = (inst >> 21) & 0x1f;
        op_inst = (inst >> 16) & 0x1f;
        offset = (int16_t)inst;
-       cache = (inst >> 16) & 0x3;
-       op = (inst >> 18) & 0x7;
+       cache = op_inst & CacheOp_Cache;
+       op = op_inst & CacheOp_Op;
 
        va = arch->gprs[base] + offset;
 
@@ -1595,14 +1581,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
         * invalidate the caches entirely by stepping through all the
         * ways/indexes
         */
-       if (op == MIPS_CACHE_OP_INDEX_INV) {
+       if (op == Index_Writeback_Inv) {
                kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
                          vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
                          arch->gprs[base], offset);
 
-               if (cache == MIPS_CACHE_DCACHE)
+               if (cache == Cache_D)
                        r4k_blast_dcache();
-               else if (cache == MIPS_CACHE_ICACHE)
+               else if (cache == Cache_I)
                        r4k_blast_icache();
                else {
                        kvm_err("%s: unsupported CACHE INDEX operation\n",
@@ -1675,9 +1661,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 
 skip_fault:
        /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
-       if (cache == MIPS_CACHE_DCACHE
-           && (op == MIPS_CACHE_OP_FILL_WB_INV
-               || op == MIPS_CACHE_OP_HIT_INV)) {
+       if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
                flush_dcache_line(va);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -1687,7 +1671,7 @@ skip_fault:
                 */
                kvm_mips_trans_cache_va(inst, opc, vcpu);
 #endif
-       } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) {
+       } else if (op_inst == Hit_Invalidate_I) {
                flush_dcache_line(va);
                flush_icache_line(va);
 
@@ -1781,7 +1765,7 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
                kvm_debug("Delivering SYSCALL @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_SYSCALL << CAUSEB_EXCCODE));
+                                         (EXCCODE_SYS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -1828,7 +1812,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1874,7 +1858,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1918,7 +1902,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1962,7 +1946,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2033,7 +2017,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
        }
 
-       kvm_change_c0_guest_cause(cop0, (0xff), (T_TLB_MOD << CAUSEB_EXCCODE));
+       kvm_change_c0_guest_cause(cop0, (0xff),
+                                 (EXCCODE_MOD << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2068,7 +2053,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
        arch->pc = KVM_GUEST_KSEG0 + 0x180;
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_COP_UNUSABLE << CAUSEB_EXCCODE));
+                                 (EXCCODE_CPU << CAUSEB_EXCCODE));
        kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE));
 
        return EMULATE_DONE;
@@ -2096,7 +2081,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
                kvm_debug("Delivering RI @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_RES_INST << CAUSEB_EXCCODE));
+                                         (EXCCODE_RI << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2131,7 +2116,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
                kvm_debug("Delivering BP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_BREAK << CAUSEB_EXCCODE));
+                                         (EXCCODE_BP << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2166,7 +2151,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
                kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_TRAP << CAUSEB_EXCCODE));
+                                         (EXCCODE_TR << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2201,7 +2186,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
                kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSAFPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSAFPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2236,7 +2221,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
                kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_FPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_FPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2271,7 +2256,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
                kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSADIS << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSADIS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2480,25 +2465,25 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 
        if (usermode) {
                switch (exccode) {
-               case T_INT:
-               case T_SYSCALL:
-               case T_BREAK:
-               case T_RES_INST:
-               case T_TRAP:
-               case T_MSAFPE:
-               case T_FPE:
-               case T_MSADIS:
+               case EXCCODE_INT:
+               case EXCCODE_SYS:
+               case EXCCODE_BP:
+               case EXCCODE_RI:
+               case EXCCODE_TR:
+               case EXCCODE_MSAFPE:
+               case EXCCODE_FPE:
+               case EXCCODE_MSADIS:
                        break;
 
-               case T_COP_UNUSABLE:
+               case EXCCODE_CPU:
                        if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 0)
                                er = EMULATE_PRIV_FAIL;
                        break;
 
-               case T_TLB_MOD:
+               case EXCCODE_MOD:
                        break;
 
-               case T_TLB_LD_MISS:
+               case EXCCODE_TLBL:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2507,12 +2492,12 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: LD MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADEL << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_TLB_ST_MISS:
+               case EXCCODE_TLBS:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2521,26 +2506,26 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: ST MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADES << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_ADDR_ERR_ST:
+               case EXCCODE_ADES:
                        kvm_debug("%s: address error ST @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBS << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
-               case T_ADDR_ERR_LD:
+               case EXCCODE_ADEL:
                        kvm_debug("%s: address error LD @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBL << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
@@ -2583,13 +2568,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
         * an entry into the guest TLB.
         */
        index = kvm_mips_guest_tlb_lookup(vcpu,
-                                         (va & VPN2_MASK) |
-                                         (kvm_read_c0_guest_entryhi
-                                          (vcpu->arch.cop0) & ASID_MASK));
+                     (va & VPN2_MASK) |
+                     (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & ASID_MASK));
        if (index < 0) {
-               if (exccode == T_TLB_LD_MISS) {
+               if (exccode == EXCCODE_TLBL) {
                        er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
-               } else if (exccode == T_TLB_ST_MISS) {
+               } else if (exccode == EXCCODE_TLBS) {
                        er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu);
                } else {
                        kvm_err("%s: invalid exc code: %d\n", __func__,
@@ -2604,10 +2588,10 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
                 * exception to the guest
                 */
                if (!TLB_IS_VALID(*tlb, va)) {
-                       if (exccode == T_TLB_LD_MISS) {
+                       if (exccode == EXCCODE_TLBL) {
                                er = kvm_mips_emulate_tlbinv_ld(cause, opc, run,
                                                                vcpu);
-                       } else if (exccode == T_TLB_ST_MISS) {
+                       } else if (exccode == EXCCODE_TLBS) {
                                er = kvm_mips_emulate_tlbinv_st(cause, opc, run,
                                                                vcpu);
                        } else {
index 9b44459..95f7906 100644 (file)
@@ -128,7 +128,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -137,7 +137,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -146,7 +146,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -155,7 +155,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
index 7e22108..81687ab 100644 (file)
@@ -335,7 +335,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
 
        /* Now restore the host state just enough to run the handlers */
 
-       /* Swtich EBASE to the one used by Linux */
+       /* Switch EBASE to the one used by Linux */
        /* load up the host EBASE */
        mfc0    v0, CP0_STATUS
 
@@ -490,11 +490,11 @@ __kvm_mips_return_to_guest:
        REG_ADDU t3, t1, t2
        LONG_L  k0, (t3)
        andi    k0, k0, 0xff
-       mtc0    k0,CP0_ENTRYHI
+       mtc0    k0, CP0_ENTRYHI
        ehb
 
        /* Disable RDHWR access */
-       mtc0    zero,  CP0_HWRENA
+       mtc0    zero, CP0_HWRENA
 
        /* load the guest context from VCPU and return */
        LONG_L  $0, VCPU_R0(k1)
@@ -606,11 +606,11 @@ __kvm_mips_return_to_host:
 
        /* Restore RDHWR access */
        PTR_LI  k0, 0x2000000F
-       mtc0    k0,  CP0_HWRENA
+       mtc0    k0, CP0_HWRENA
 
        /* Restore RA, which is the address we will return to */
-       LONG_L  ra, PT_R31(k1)
-       j       ra
+       LONG_L  ra, PT_R31(k1)
+       j       ra
         nop
 
 VECTOR_END(MIPSX(GuestExceptionEnd))
index b9b803f..8bc3977 100644 (file)
@@ -229,7 +229,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                            kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
 
                        if (!kvm->arch.guest_pmap) {
-                               kvm_err("Failed to allocate guest PMAP");
+                               kvm_err("Failed to allocate guest PMAP\n");
                                return;
                        }
 
@@ -1264,8 +1264,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        switch (exccode) {
-       case T_INT:
-               kvm_debug("[%d]T_INT @ %p\n", vcpu->vcpu_id, opc);
+       case EXCCODE_INT:
+               kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc);
 
                ++vcpu->stat.int_exits;
                trace_kvm_exit(vcpu, INT_EXITS);
@@ -1276,8 +1276,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = RESUME_GUEST;
                break;
 
-       case T_COP_UNUSABLE:
-               kvm_debug("T_COP_UNUSABLE: @ PC: %p\n", opc);
+       case EXCCODE_CPU:
+               kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc);
 
                ++vcpu->stat.cop_unusable_exits;
                trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
@@ -1287,13 +1287,13 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ret = RESUME_HOST;
                break;
 
-       case T_TLB_MOD:
+       case EXCCODE_MOD:
                ++vcpu->stat.tlbmod_exits;
                trace_kvm_exit(vcpu, TLBMOD_EXITS);
                ret = kvm_mips_callbacks->handle_tlb_mod(vcpu);
                break;
 
-       case T_TLB_ST_MISS:
+       case EXCCODE_TLBS:
                kvm_debug("TLB ST fault:  cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
                          cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
                          badvaddr);
@@ -1303,7 +1303,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu);
                break;
 
-       case T_TLB_LD_MISS:
+       case EXCCODE_TLBL:
                kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
                          cause, opc, badvaddr);
 
@@ -1312,55 +1312,55 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu);
                break;
 
-       case T_ADDR_ERR_ST:
+       case EXCCODE_ADES:
                ++vcpu->stat.addrerr_st_exits;
                trace_kvm_exit(vcpu, ADDRERR_ST_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_st(vcpu);
                break;
 
-       case T_ADDR_ERR_LD:
+       case EXCCODE_ADEL:
                ++vcpu->stat.addrerr_ld_exits;
                trace_kvm_exit(vcpu, ADDRERR_LD_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu);
                break;
 
-       case T_SYSCALL:
+       case EXCCODE_SYS:
                ++vcpu->stat.syscall_exits;
                trace_kvm_exit(vcpu, SYSCALL_EXITS);
                ret = kvm_mips_callbacks->handle_syscall(vcpu);
                break;
 
-       case T_RES_INST:
+       case EXCCODE_RI:
                ++vcpu->stat.resvd_inst_exits;
                trace_kvm_exit(vcpu, RESVD_INST_EXITS);
                ret = kvm_mips_callbacks->handle_res_inst(vcpu);
                break;
 
-       case T_BREAK:
+       case EXCCODE_BP:
                ++vcpu->stat.break_inst_exits;
                trace_kvm_exit(vcpu, BREAK_INST_EXITS);
                ret = kvm_mips_callbacks->handle_break(vcpu);
                break;
 
-       case T_TRAP:
+       case EXCCODE_TR:
                ++vcpu->stat.trap_inst_exits;
                trace_kvm_exit(vcpu, TRAP_INST_EXITS);
                ret = kvm_mips_callbacks->handle_trap(vcpu);
                break;
 
-       case T_MSAFPE:
+       case EXCCODE_MSAFPE:
                ++vcpu->stat.msa_fpe_exits;
                trace_kvm_exit(vcpu, MSA_FPE_EXITS);
                ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
                break;
 
-       case T_FPE:
+       case EXCCODE_FPE:
                ++vcpu->stat.fpe_exits;
                trace_kvm_exit(vcpu, FPE_EXITS);
                ret = kvm_mips_callbacks->handle_fpe(vcpu);
                break;
 
-       case T_MSADIS:
+       case EXCCODE_MSADIS:
                ++vcpu->stat.msa_disabled_exits;
                trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
                ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
@@ -1620,7 +1620,7 @@ static struct notifier_block kvm_mips_csr_die_notifier = {
        .notifier_call = kvm_mips_csr_die_notify,
 };
 
-int __init kvm_mips_init(void)
+static int __init kvm_mips_init(void)
 {
        int ret;
 
@@ -1646,7 +1646,7 @@ int __init kvm_mips_init(void)
        return 0;
 }
 
-void __exit kvm_mips_exit(void)
+static void __exit kvm_mips_exit(void)
 {
        kvm_exit();
 
diff --git a/arch/mips/kvm/opcode.h b/arch/mips/kvm/opcode.h
deleted file mode 100644 (file)
index 03a6ae8..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-/* Define opcode values not defined in <asm/isnt.h> */
-
-#ifndef __KVM_MIPS_OPCODE_H__
-#define __KVM_MIPS_OPCODE_H__
-
-/* COP0 Ops */
-#define mfmcz_op       0x0b    /* 01011 */
-#define wrpgpr_op      0x0e    /* 01110 */
-
-/* COP0 opcodes (only if COP0 and CO=1): */
-#define wait_op                0x20    /* 100000 */
-
-#endif /* __KVM_MIPS_OPCODE_H__ */
index 570479c..a08c439 100644 (file)
 #define PRIx64 "llx"
 
 atomic_t kvm_mips_instance;
-EXPORT_SYMBOL(kvm_mips_instance);
+EXPORT_SYMBOL_GPL(kvm_mips_instance);
 
 /* These function pointers are initialized once the KVM module is loaded */
 kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-EXPORT_SYMBOL(kvm_mips_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn);
 
 void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_release_pfn_clean);
+EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean);
 
 bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_is_error_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn);
 
 uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 {
@@ -111,7 +111,7 @@ void kvm_mips_dump_host_tlbs(void)
        mtc0_tlbw_hazard();
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_dump_host_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs);
 
 void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -139,7 +139,7 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
                         (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
        }
 }
-EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
 
 static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
 {
@@ -191,7 +191,7 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
 
        return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
 }
-EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa);
+EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa);
 
 /* XXXKYMA: Must be called with interrupts disabled */
 /* set flush_dcache_mask == 0 if no dcache flush required */
@@ -308,7 +308,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       flush_dcache_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
 
 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
        struct kvm_vcpu *vcpu)
@@ -351,7 +351,7 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
 
 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
                                         struct kvm_mips_tlb *tlb,
@@ -401,7 +401,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       tlb->tlb_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
 
 int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 {
@@ -422,7 +422,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 
        return index;
 }
-EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
 
 int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 {
@@ -458,7 +458,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 
        return idx;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
 
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 {
@@ -505,44 +505,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_inv);
-
-/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */
-int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index)
-{
-       unsigned long flags, old_entryhi;
-
-       if (index >= current_cpu_data.tlbsize)
-               BUG();
-
-       local_irq_save(flags);
-
-       old_entryhi = read_c0_entryhi();
-
-       write_c0_entryhi(UNIQUE_ENTRYHI(index));
-       mtc0_tlbw_hazard();
-
-       write_c0_index(index);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo0(0);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo1(0);
-       mtc0_tlbw_hazard();
-
-       tlb_write_indexed();
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       write_c0_entryhi(old_entryhi);
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       local_irq_restore(flags);
-
-       return 0;
-}
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
 
 void kvm_mips_flush_host_tlb(int skip_kseg0)
 {
@@ -594,7 +557,7 @@ void kvm_mips_flush_host_tlb(int skip_kseg0)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_flush_host_tlb);
+EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
 
 void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
                             struct kvm_vcpu *vcpu)
@@ -642,7 +605,7 @@ void kvm_local_flush_tlb_all(void)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_local_flush_tlb_all);
+EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
 
 /**
  * kvm_mips_migrate_count() - Migrate timer.
@@ -673,8 +636,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        local_irq_save(flags);
 
-       if (((vcpu->arch.
-             guest_kernel_asid[cpu] ^ asid_cache(cpu)) & ASID_VERSION_MASK)) {
+       if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
+                                                       ASID_VERSION_MASK) {
                kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
                vcpu->arch.guest_kernel_asid[cpu] =
                    vcpu->arch.guest_kernel_mm.context.asid[cpu];
@@ -739,7 +702,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        local_irq_restore(flags);
 
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_load);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load);
 
 /* ASID can change if another task is scheduled during preemption */
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -768,7 +731,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_put);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put);
 
 uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 {
@@ -813,4 +776,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 
        return inst;
 }
-EXPORT_SYMBOL(kvm_get_inst);
+EXPORT_SYMBOL_GPL(kvm_get_inst);
index d836ed5..ad98800 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <linux/kvm_host.h>
 
-#include "opcode.h"
 #include "interrupt.h"
 
 static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
index 272af8a..5530070 100644 (file)
@@ -57,7 +57,6 @@ notrace void arch_local_irq_disable(void)
 }
 EXPORT_SYMBOL(arch_local_irq_disable);
 
-
 notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags;
@@ -111,31 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags)
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-
-notrace void __arch_local_irq_restore(unsigned long flags)
-{
-       unsigned long __tmp1;
-
-       preempt_disable();
-
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-       "       mfc0    $1, $12                                         \n"
-       "       andi    %[flags], 1                                     \n"
-       "       ori     $1, 0x1f                                        \n"
-       "       xori    $1, 0x1f                                        \n"
-       "       or      %[flags], $1                                    \n"
-       "       mtc0    %[flags], $12                                   \n"
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (__tmp1)
-       : "0" (flags)
-       : "memory");
-
-       preempt_enable();
-}
-EXPORT_SYMBOL(__arch_local_irq_restore);
-
-#endif /* !CONFIG_CPU_MIPSR2 */
+#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
index 2e48e83..85d8089 100644 (file)
@@ -22,6 +22,27 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
   endif
 endif
 
+cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+#
+# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
+# as MIPS64 R2; older versions as just R1.  This leaves the possibility open
+# that GCC might generate R2 code for -march=loongson3a which then is rejected
+# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
+# can't easily be used safely within the kbuild framework.
+#
+ifeq ($(call cc-ifversion, -ge, 0409, y), y)
+  ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  endif
+else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+endif
+
 #
 # Loongson Machines' Support
 #
index bf9f1a7..a2631a5 100644 (file)
@@ -13,6 +13,9 @@
 #define SMBUS_PCI_REG64                0x64
 #define SMBUS_PCI_REGB4                0xb4
 
+#define HPET_MIN_CYCLES                64
+#define HPET_MIN_PROG_DELTA    (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+
 static DEFINE_SPINLOCK(hpet_lock);
 DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
 
@@ -161,8 +164,9 @@ static int hpet_next_event(unsigned long delta,
        cnt += delta;
        hpet_write(HPET_T0_CMP, cnt);
 
-       res = ((int)(hpet_read(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
-       return res;
+       res = (int)(cnt - hpet_read(HPET_COUNTER));
+
+       return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
 static irqreturn_t hpet_irq_handler(int irq, void *data)
@@ -237,7 +241,7 @@ void __init setup_hpet_timer(void)
        cd->cpumask = cpumask_of(cpu);
        clockevent_set_clock(cd, HPET_FREQ);
        cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
-       cd->min_delta_ns = 5000;
+       cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
 
        clockevents_register_device(cd);
        setup_irq(HPET_T0_IRQ, &hpet_irq);
index 1a4738a..509832a 100644 (file)
 #include "smp.h"
 
 DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU(uint32_t, core0_c0count);
 
 static void *ipi_set0_regs[16];
 static void *ipi_clear0_regs[16];
 static void *ipi_status0_regs[16];
 static void *ipi_en0_regs[16];
 static void *ipi_mailbox_buf[16];
+static uint32_t core0_c0count[NR_CPUS];
 
 /* read a 32bit value from ipi register */
 #define loongson3_ipi_read32(addr) readl(addr)
@@ -275,12 +275,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
        if (action & SMP_ASK_C0COUNT) {
                BUG_ON(cpu != 0);
                c0count = read_c0_count();
-               for (i = 1; i < num_possible_cpus(); i++)
-                       per_cpu(core0_c0count, i) = c0count;
+               c0count = c0count ? c0count : 1;
+               for (i = 1; i < nr_cpu_ids; i++)
+                       core0_c0count[i] = c0count;
+               __wbflush(); /* Let others see the result ASAP */
        }
 }
 
-#define MAX_LOOPS 1111
+#define MAX_LOOPS 800
 /*
  * SMP init and finish on secondary CPUs
  */
@@ -305,16 +307,20 @@ static void loongson3_init_secondary(void)
                cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
 
        i = 0;
-       __this_cpu_write(core0_c0count, 0);
+       core0_c0count[cpu] = 0;
        loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
-       while (!__this_cpu_read(core0_c0count)) {
+       while (!core0_c0count[cpu]) {
                i++;
                cpu_relax();
        }
 
        if (i > MAX_LOOPS)
                i = MAX_LOOPS;
-       initcount = __this_cpu_read(core0_c0count) + i;
+       if (cpu_data[cpu].package)
+               initcount = core0_c0count[cpu] + i;
+       else /* Local access is faster for loops */
+               initcount = core0_c0count[cpu] + i/2;
+
        write_c0_count(initcount);
 }
 
index 32f0e19..cdfd44f 100644 (file)
@@ -1266,6 +1266,8 @@ branch_common:
                                                 */
                                                sig = mips_dsemul(xcp, ir,
                                                                  contpc);
+                                               if (sig < 0)
+                                                       break;
                                                if (sig)
                                                        xcp->cp0_epc = bcpc;
                                                /*
@@ -1319,6 +1321,8 @@ branch_common:
                                 * instruction in the dslot
                                 */
                                sig = mips_dsemul(xcp, ir, contpc);
+                               if (sig < 0)
+                                       break;
                                if (sig)
                                        xcp->cp0_epc = bcpc;
                                /* SIGILL forces out of the emulation loop.  */
index 926d56b..eb96485 100644 (file)
 
 union ieee754dp ieee754dp_neg(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754dp_sub(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = !DPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754dp_sub(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754dp ieee754dp_abs(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (DPSIGN(x))
-               y = ieee754dp_sub(ieee754dp_zero(0), x);
-       else
-               y = ieee754dp_add(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (DPSIGN(x))
+                       y = ieee754dp_sub(ieee754dp_zero(0), x);
+               else
+                       y = ieee754dp_add(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 6ffc336..f398561 100644 (file)
@@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x)
                if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9cdc145..748fa10 100644 (file)
@@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index cbb36c1..46b964d 100644 (file)
@@ -31,17 +31,41 @@ struct emuframe {
        unsigned long           epc;
 };
 
+/*
+ * Set up an emulation frame for instruction IR, from a delay slot of
+ * a branch jumping to CPC.  Return 0 if successful, -1 if no emulation
+ * required, otherwise a signal number causing a frame setup failure.
+ */
 int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 {
+       int isa16 = get_isa16_mode(regs->cp0_epc);
+       mips_instruction break_math;
        struct emuframe __user *fr;
        int err;
 
-       if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) ||
-               (ir == 0)) {
-               /* NOP is easy */
-               regs->cp0_epc = cpc;
-               clear_delay_slot(regs);
-               return 0;
+       /* NOP is easy */
+       if (ir == 0)
+               return -1;
+
+       /* microMIPS instructions */
+       if (isa16) {
+               union mips_instruction insn = { .word = ir };
+
+               /* NOP16 aka MOVE16 $0, $0 */
+               if ((ir >> 16) == MM_NOP16)
+                       return -1;
+
+               /* ADDIUPC */
+               if (insn.mm_a_format.opcode == mm_addiupc_op) {
+                       unsigned int rs;
+                       s32 v;
+
+                       rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2);
+                       v = regs->cp0_epc & ~3;
+                       v += insn.mm_a_format.simmediate << 2;
+                       regs->regs[rs] = (long)v;
+                       return -1;
+               }
        }
 
        pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
@@ -55,14 +79,10 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
         * Algorithmics used a system call instruction, and
         * borrowed that vector.  MIPS/Linux version is a bit
         * more heavyweight in the interests of portability and
-        * multiprocessor support.  For Linux we generate a
-        * an unaligned access and force an address error exception.
-        *
-        * For embedded systems (stand-alone) we prefer to use a
-        * non-existing CP1 instruction. This prevents us from emulating
-        * branches, but gives us a cleaner interface to the exception
-        * handler (single entry point).
+        * multiprocessor support.  For Linux we use a BREAK 514
+        * instruction causing a breakpoint exception.
         */
+       break_math = BREAK_MATH(isa16);
 
        /* Ensure that the two instructions are in the same cache line */
        fr = (struct emuframe __user *)
@@ -72,14 +92,18 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
        if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
                return SIGBUS;
 
-       if (get_isa16_mode(regs->cp0_epc)) {
-               err = __put_user(ir >> 16, (u16 __user *)(&fr->emul));
-               err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2));
-               err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst));
-               err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __put_user(ir >> 16,
+                                (u16 __user *)(&fr->emul));
+               err |= __put_user(ir & 0xffff,
+                                 (u16 __user *)((long)(&fr->emul) + 2));
+               err |= __put_user(break_math >> 16,
+                                 (u16 __user *)(&fr->badinst));
+               err |= __put_user(break_math & 0xffff,
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
        } else {
                err = __put_user(ir, &fr->emul);
-               err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst);
+               err |= __put_user(break_math, &fr->badinst);
        }
 
        err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
@@ -90,8 +114,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
                return SIGBUS;
        }
 
-       regs->cp0_epc = ((unsigned long) &fr->emul) |
-               get_isa16_mode(regs->cp0_epc);
+       regs->cp0_epc = (unsigned long)&fr->emul | isa16;
 
        flush_cache_sigtramp((unsigned long)&fr->emul);
 
@@ -100,6 +123,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 
 int do_dsemulret(struct pt_regs *xcp)
 {
+       int isa16 = get_isa16_mode(xcp->cp0_epc);
        struct emuframe __user *fr;
        unsigned long epc;
        u32 insn, cookie;
@@ -122,16 +146,19 @@ int do_dsemulret(struct pt_regs *xcp)
         *  - Is the instruction pointed to by the EPC an BREAK_MATH?
         *  - Is the following memory word the BD_COOKIE?
         */
-       if (get_isa16_mode(xcp->cp0_epc)) {
-               err = __get_user(instr[0], (u16 __user *)(&fr->badinst));
-               err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __get_user(instr[0],
+                                (u16 __user *)(&fr->badinst));
+               err |= __get_user(instr[1],
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
                insn = (instr[0] << 16) | instr[1];
        } else {
                err = __get_user(insn, &fr->badinst);
        }
        err |= __get_user(cookie, &fr->cookie);
 
-       if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
+       if (unlikely(err ||
+                    insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
                MIPS_FPU_EMU_INC_STATS(errors);
                return 0;
        }
index 8e97acb..e16ae7b 100644 (file)
@@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
        DPCNST(1, 3,           0x4000000000000ULL),     /* - 10.0   */
        DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL),     /* + infinity */
        DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL),     /* - infinity */
-       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + indef quiet Nan */
+       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + ind legacy qNaN */
+       DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL),     /* + indef 2008 qNaN */
        DPCNST(0, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* + max */
        DPCNST(1, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* - max */
        DPCNST(0, DP_EMIN,     0x0000000000000ULL),     /* + min normal */
@@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = {
        SPCNST(1, 3,           0x200000),       /* - 10.0   */
        SPCNST(0, SP_EMAX + 1, 0x000000),       /* + infinity */
        SPCNST(1, SP_EMAX + 1, 0x000000),       /* - infinity */
-       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef quiet Nan  */
+       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef legacy quiet NaN */
+       SPCNST(0, SP_EMAX + 1, 0x400000),       /* + indef 2008 quiet NaN */
        SPCNST(0, SP_EMAX,     0x7FFFFF),       /* + max normal */
        SPCNST(1, SP_EMAX,     0x7FFFFF),       /* - max normal */
        SPCNST(0, SP_EMIN,     0x000000),       /* + min normal */
index df94720..d3be351 100644 (file)
@@ -221,15 +221,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
 #define IEEE754_SPCVAL_NTEN            5       /* -10.0 */
 #define IEEE754_SPCVAL_PINFINITY       6       /* +inf */
 #define IEEE754_SPCVAL_NINFINITY       7       /* -inf */
-#define IEEE754_SPCVAL_INDEF           8       /* quiet NaN */
-#define IEEE754_SPCVAL_PMAX            9       /* +max norm */
-#define IEEE754_SPCVAL_NMAX            10      /* -max norm */
-#define IEEE754_SPCVAL_PMIN            11      /* +min norm */
-#define IEEE754_SPCVAL_NMIN            12      /* -min norm */
-#define IEEE754_SPCVAL_PMIND           13      /* +min denorm */
-#define IEEE754_SPCVAL_NMIND           14      /* -min denorm */
-#define IEEE754_SPCVAL_P1E31           15      /* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63           16      /* + 1.0e63 */
+#define IEEE754_SPCVAL_INDEF_LEG       8       /* legacy quiet NaN */
+#define IEEE754_SPCVAL_INDEF_2008      9       /* IEEE 754-2008 quiet NaN */
+#define IEEE754_SPCVAL_PMAX            10      /* +max norm */
+#define IEEE754_SPCVAL_NMAX            11      /* -max norm */
+#define IEEE754_SPCVAL_PMIN            12      /* +min norm */
+#define IEEE754_SPCVAL_NMIN            13      /* -min norm */
+#define IEEE754_SPCVAL_PMIND           14      /* +min denorm */
+#define IEEE754_SPCVAL_NMIND           15      /* -min denorm */
+#define IEEE754_SPCVAL_P1E31           16      /* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63           17      /* + 1.0e63 */
 
 extern const union ieee754dp __ieee754dp_spcvals[];
 extern const union ieee754sp __ieee754sp_spcvals[];
@@ -243,7 +244,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754dp_zero(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754dp_one(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754dp_ten(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754dp_max(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754dp_min(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754dp_mind(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -254,7 +256,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754sp_zero(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754sp_one(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754sp_ten(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754sp_max(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754sp_min(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754sp_mind(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -266,12 +269,25 @@ extern const union ieee754sp __ieee754sp_spcvals[];
  */
 static inline int ieee754si_indef(void)
 {
-       return INT_MAX;
+       return ieee754_csr.nan2008 ? 0 : INT_MAX;
 }
 
 static inline s64 ieee754di_indef(void)
 {
-       return S64_MAX;
+       return ieee754_csr.nan2008 ? 0 : S64_MAX;
+}
+
+/*
+ * Overflow integer value
+ */
+static inline int ieee754si_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX;
+}
+
+static inline s64 ieee754di_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX;
 }
 
 /* result types for xctx.rt */
index 522d843..ad3c734 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x)
 
 static inline int ieee754dp_issnan(union ieee754dp x)
 {
+       int qbit;
+
        assert(ieee754dp_isnan(x));
-       return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
        assert(ieee754dp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754dp_indef();
+       if (ieee754_csr.nan2008)
+               DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
+       else
+               r = ieee754dp_indef();
+
+       return r;
 }
 
 static u64 ieee754dp_get_rounding(int sn, u64 xm)
index 6383e2c..ed7bb27 100644 (file)
@@ -63,10 +63,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == SP_EMAX+1+SP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & SP_MBIT(SP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == SP_EMIN-1+SP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = SP_EMIN;                                   \
@@ -97,10 +97,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == DP_EMAX+1+DP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & DP_MBIT(DP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == DP_EMIN-1+DP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = DP_EMIN;                                   \
index ca8e35e..def00ff 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x)
 
 static inline int ieee754sp_issnan(union ieee754sp x)
 {
+       int qbit;
+
        assert(ieee754sp_isnan(x));
-       return SPMANT(x) & SP_MBIT(SP_FBITS - 1);
+       qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
        assert(ieee754sp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754sp_indef();
+       if (ieee754_csr.nan2008)
+               SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
+       else
+               r = ieee754sp_indef();
+
+       return r;
 }
 
 static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
index 3797148..5060e8f 100644 (file)
@@ -44,13 +44,16 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
 
        switch (xc) {
        case IEEE754_CLASS_SNAN:
-               return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
-
+               x = ieee754dp_nanxcpt(x);
+               EXPLODEXDP;
+               /* Fall through.  */
        case IEEE754_CLASS_QNAN:
                y = ieee754sp_nan_fdp(xs, xm);
-               EXPLODEYSP;
-               if (!ieee754_class_nan(yc))
-                       y = ieee754sp_indef();
+               if (!ieee754_csr.nan2008) {
+                       EXPLODEYSP;
+                       if (!ieee754_class_nan(yc))
+                               y = ieee754sp_indef();
+               }
                return y;
 
        case IEEE754_CLASS_INF:
index c50e945..756c9cf 100644 (file)
 
 union ieee754sp ieee754sp_neg(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754sp_sub(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = !SPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754sp_sub(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754sp ieee754sp_abs(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (SPSIGN(x))
-               y = ieee754sp_sub(ieee754sp_zero(0), x);
-       else
-               y = ieee754sp_add(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (SPSIGN(x))
+                       y = ieee754sp_sub(ieee754sp_zero(0), x);
+               else
+                       y = ieee754sp_add(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 091299a..f4b4cab 100644 (file)
@@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x)
                if ((xm >> 31) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9f3c742..a2450c7 100644 (file)
@@ -39,10 +39,13 @@ s64 ieee754sp_tlong(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -57,7 +60,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -94,7 +97,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 482192c..5a04b6f 100644 (file)
@@ -241,7 +241,7 @@ static void output_pgtable_bits_defines(void)
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
        pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
 #endif
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (cpu_has_rixi) {
 #ifdef _PAGE_NO_EXEC_SHIFT
                pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
index 2eda01e..139ad1d 100644 (file)
@@ -43,6 +43,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80)  += pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)           += fixup-sni.o ops-sni.o
 obj-$(CONFIG_LANTIQ)           += fixup-lantiq.o
 obj-$(CONFIG_PCI_LANTIQ)       += pci-lantiq.o ops-lantiq.o
+obj-$(CONFIG_SOC_MT7620)       += pci-mt7620.o
 obj-$(CONFIG_SOC_RT288X)       += pci-rt2880.o
 obj-$(CONFIG_SOC_RT3883)       += pci-rt3883.o
 obj-$(CONFIG_TANBAC_TB0219)    += fixup-tb0219.o
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
new file mode 100644 (file)
index 0000000..a009ee4
--- /dev/null
@@ -0,0 +1,426 @@
+/*
+ *  Ralink MT7620A SoC PCI support
+ *
+ *  Copyright (C) 2007-2013 Bruce Chang (Mediatek)
+ *  Copyright (C) 2013-2016 John Crispin <blogic@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7620.h>
+
+#define RALINK_PCI_IO_MAP_BASE         0x10160000
+#define RALINK_PCI_MEMORY_BASE         0x0
+
+#define RALINK_INT_PCIE0               4
+
+#define RALINK_CLKCFG1                 0x30
+#define RALINK_GPIOMODE                        0x60
+
+#define PPLL_CFG1                      0x9c
+#define PDRV_SW_SET                    BIT(23)
+
+#define PPLL_DRV                       0xa0
+#define PDRV_SW_SET                    (1<<31)
+#define LC_CKDRVPD                     (1<<19)
+#define LC_CKDRVOHZ                    (1<<18)
+#define LC_CKDRVHZ                     (1<<17)
+#define LC_CKTEST                      (1<<16)
+
+/* PCI Bridge registers */
+#define RALINK_PCI_PCICFG_ADDR         0x00
+#define PCIRST                         BIT(1)
+
+#define RALINK_PCI_PCIENA              0x0C
+#define PCIINT2                                BIT(20)
+
+#define RALINK_PCI_CONFIG_ADDR         0x20
+#define RALINK_PCI_CONFIG_DATA_VIRT_REG        0x24
+#define RALINK_PCI_MEMBASE             0x28
+#define RALINK_PCI_IOBASE              0x2C
+
+/* PCI RC registers */
+#define RALINK_PCI0_BAR0SETUP_ADDR     0x10
+#define RALINK_PCI0_IMBASEBAR0_ADDR    0x18
+#define RALINK_PCI0_ID                 0x30
+#define RALINK_PCI0_CLASS              0x34
+#define RALINK_PCI0_SUBID              0x38
+#define RALINK_PCI0_STATUS             0x50
+#define PCIE_LINK_UP_ST                        BIT(0)
+
+#define PCIEPHY0_CFG                   0x90
+
+#define RALINK_PCIEPHY_P0_CTL_OFFSET   0x7498
+#define RALINK_PCIE0_CLK_EN            (1 << 26)
+
+#define BUSY                           0x80000000
+#define WAITRETRY_MAX                  10
+#define WRITE_MODE                     (1UL << 23)
+#define DATA_SHIFT                     0
+#define ADDR_SHIFT                     8
+
+
+static void __iomem *bridge_base;
+static void __iomem *pcie_base;
+
+static struct reset_control *rstpcie0;
+
+static inline void bridge_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, bridge_base + reg);
+}
+
+static inline u32 bridge_r32(unsigned reg)
+{
+       return ioread32(bridge_base + reg);
+}
+
+static inline void pcie_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, pcie_base + reg);
+}
+
+static inline u32 pcie_r32(unsigned reg)
+{
+       return ioread32(pcie_base + reg);
+}
+
+static inline void pcie_m32(u32 clr, u32 set, unsigned reg)
+{
+       u32 val = pcie_r32(reg);
+
+       val &= ~clr;
+       val |= set;
+       pcie_w32(val, reg);
+}
+
+static int wait_pciephy_busy(void)
+{
+       unsigned long reg_value = 0x0, retry = 0;
+
+       while (1) {
+               reg_value = pcie_r32(PCIEPHY0_CFG);
+
+               if (reg_value & BUSY)
+                       mdelay(100);
+               else
+                       break;
+               if (retry++ > WAITRETRY_MAX) {
+                       printk(KERN_WARN "PCIE-PHY retry failed.\n");
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void pcie_phy(unsigned long addr, unsigned long val)
+{
+       wait_pciephy_busy();
+       pcie_w32(WRITE_MODE | (val << DATA_SHIFT) | (addr << ADDR_SHIFT),
+                PCIEPHY0_CFG);
+       mdelay(1);
+       wait_pciephy_busy();
+}
+
+static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where,
+                          int size, u32 *val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               *val = (data >> ((where & 3) << 3)) & 0xff;
+               break;
+       case 2:
+               *val = (data >> ((where & 3) << 3)) & 0xffff;
+               break;
+       case 4:
+               *val = data;
+               break;
+       }
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where,
+                           int size, u32 val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               data = (data & ~(0xff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 2:
+               data = (data & ~(0xffff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 4:
+               data = val;
+               break;
+       }
+
+       bridge_w32(data, RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops mt7620_pci_ops = {
+       .read   = pci_config_read,
+       .write  = pci_config_write,
+};
+
+static struct resource mt7620_res_pci_mem1;
+static struct resource mt7620_res_pci_io1;
+struct pci_controller mt7620_controller = {
+       .pci_ops        = &mt7620_pci_ops,
+       .mem_resource   = &mt7620_res_pci_mem1,
+       .mem_offset     = 0x00000000UL,
+       .io_resource    = &mt7620_res_pci_io1,
+       .io_offset      = 0x00000000UL,
+       .io_map_base    = 0xa0000000,
+};
+
+static int mt7620_pci_hw_init(struct platform_device *pdev)
+{
+       /* bypass PCIe DLL */
+       pcie_phy(0x0, 0x80);
+       pcie_phy(0x1, 0x04);
+
+       /* Elastic buffer control */
+       pcie_phy(0x68, 0xB4);
+
+       /* put core into reset */
+       pcie_m32(0, PCIRST, RALINK_PCI_PCICFG_ADDR);
+       reset_control_assert(rstpcie0);
+
+       /* disable power and all clocks */
+       rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+
+       /* bring core out of reset */
+       reset_control_deassert(rstpcie0);
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       if (!(rt_sysc_r32(PPLL_CFG1) & PDRV_SW_SET)) {
+               dev_err(&pdev->dev, "MT7620 PPLL unlock\n");
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               return -1;
+       }
+
+       /* power up the bus */
+       rt_sysc_m32(LC_CKDRVHZ | LC_CKDRVOHZ, LC_CKDRVPD | PDRV_SW_SET,
+                   PPLL_DRV);
+
+       return 0;
+}
+
+static int mt7628_pci_hw_init(struct platform_device *pdev)
+{
+       u32 val = 0;
+
+       /* bring the core out of reset */
+       rt_sysc_m32(BIT(16), 0, RALINK_GPIOMODE);
+       reset_control_deassert(rstpcie0);
+
+       /* enable the pci clk */
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       /* voodoo from the SDK driver */
+       pcie_m32(~0xff, 0x5, RALINK_PCIEPHY_P0_CTL_OFFSET);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       val &= ~(0xff) << 8;
+       val |= 0x50 << 8;
+       pci_config_write(NULL, 0, 0x70c, 4, val);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       dev_err(&pdev->dev, "Port 0 N_FTS = %x\n", (unsigned int) val);
+
+       return 0;
+}
+
+static int mt7620_pci_probe(struct platform_device *pdev)
+{
+       struct resource *bridge_res = platform_get_resource(pdev,
+                                                           IORESOURCE_MEM, 0);
+       struct resource *pcie_res = platform_get_resource(pdev,
+                                                         IORESOURCE_MEM, 1);
+       u32 val = 0;
+
+       rstpcie0 = devm_reset_control_get(&pdev->dev, "pcie0");
+       if (IS_ERR(rstpcie0))
+               return PTR_ERR(rstpcie0);
+
+       bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res);
+       if (!bridge_base)
+               return -ENOMEM;
+
+       pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res);
+       if (!pcie_base)
+               return -ENOMEM;
+
+       iomem_resource.start = 0;
+       iomem_resource.end = ~0;
+       ioport_resource.start = 0;
+       ioport_resource.end = ~0;
+
+       /* bring up the pci core */
+       switch (ralink_soc) {
+       case MT762X_SOC_MT7620A:
+               if (mt7620_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       case MT762X_SOC_MT7628AN:
+               if (mt7628_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       default:
+               dev_err(&pdev->dev, "pcie is not supported on this hardware\n");
+               return -1;
+       }
+       mdelay(50);
+
+       /* enable write access */
+       pcie_m32(PCIRST, 0, RALINK_PCI_PCICFG_ADDR);
+       mdelay(100);
+
+       /* check if there is a card present */
+       if ((pcie_r32(RALINK_PCI0_STATUS) & PCIE_LINK_UP_ST) == 0) {
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               if (ralink_soc == MT762X_SOC_MT7620A)
+                       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+               dev_err(&pdev->dev, "PCIE0 no card, disable it(RST&CLK)\n");
+               return -1;
+       }
+
+       /* setup ranges */
+       bridge_w32(0xffffffff, RALINK_PCI_MEMBASE);
+       bridge_w32(RALINK_PCI_IO_MAP_BASE, RALINK_PCI_IOBASE);
+
+       pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+       pcie_w32(RALINK_PCI_MEMORY_BASE, RALINK_PCI0_IMBASEBAR0_ADDR);
+       pcie_w32(0x06040001, RALINK_PCI0_CLASS);
+
+       /* enable interrupts */
+       pcie_m32(0, PCIINT2, RALINK_PCI_PCIENA);
+
+       /* voodoo from the SDK driver */
+       pci_config_read(NULL, 0, 4, 4, &val);
+       pci_config_write(NULL, 0, 4, 4, val | 0x7);
+
+       pci_load_of_ranges(&mt7620_controller, pdev->dev.of_node);
+       register_pci_controller(&mt7620_controller);
+
+       return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       u16 cmd;
+       u32 val;
+       int irq = 0;
+
+       if ((dev->bus->number == 0) && (slot == 0)) {
+               pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+               pci_config_write(dev->bus, 0, PCI_BASE_ADDRESS_0, 4,
+                                RALINK_PCI_MEMORY_BASE);
+               pci_config_read(dev->bus, 0, PCI_BASE_ADDRESS_0, 4, &val);
+       } else if ((dev->bus->number == 1) && (slot == 0x0)) {
+               irq = RALINK_INT_PCIE0;
+       } else {
+               dev_err(&dev->dev, "no irq found - bus=0x%x, slot = 0x%x\n",
+                       dev->bus->number, slot);
+               return 0;
+       }
+       dev_err(&dev->dev, "card - bus=0x%x, slot = 0x%x irq=%d\n",
+               dev->bus->number, slot, irq);
+
+       /* configure the cache line size to 0x14 */
+       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 0x14);
+
+       /* configure latency timer to 0xff */
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xff);
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+       /* setup the slot */
+       cmd = cmd | PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
+       pci_write_config_word(dev, PCI_COMMAND, cmd);
+       pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+
+       return irq;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+       return 0;
+}
+
+static const struct of_device_id mt7620_pci_ids[] = {
+       { .compatible = "mediatek,mt7620-pci" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, mt7620_pci_ids);
+
+static struct platform_driver mt7620_pci_driver = {
+       .probe = mt7620_pci_probe,
+       .driver = {
+               .name = "mt7620-pci",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(mt7620_pci_ids),
+       },
+};
+
+static int __init mt7620_pci_init(void)
+{
+       return platform_driver_register(&mt7620_pci_driver);
+}
+
+arch_initcall(mt7620_pci_init);
diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
new file mode 100644 (file)
index 0000000..fde56a8
--- /dev/null
@@ -0,0 +1,51 @@
+if MACH_PIC32
+
+choice
+       prompt "Machine Type"
+
+config PIC32MZDA
+       bool "Microchip PIC32MZDA Platform"
+       select BOOT_ELF32
+       select BOOT_RAW
+       select CEVT_R4K
+       select CSRC_R4K
+       select DMA_NONCOHERENT
+       select SYS_HAS_CPU_MIPS32_R2
+       select SYS_HAS_EARLY_PRINTK
+       select SYS_SUPPORTS_32BIT_KERNEL
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select ARCH_REQUIRE_GPIOLIB
+       select HAVE_MACH_CLKDEV
+       select COMMON_CLK
+       select CLKDEV_LOOKUP
+       select LIBFDT
+       select USE_OF
+       select PINCTRL
+       select PIC32_EVIC
+       help
+         Support for the Microchip PIC32MZDA microcontroller.
+
+         This is a 32-bit microcontroller with support for external or
+         internally packaged DDR2 memory up to 128MB.
+
+         For more information, see <http://www.microchip.com/>.
+
+endchoice
+
+choice
+       prompt "Devicetree selection"
+       default DTB_PIC32_NONE
+       help
+         Select the devicetree.
+
+config DTB_PIC32_NONE
+       bool "None"
+
+config DTB_PIC32_MZDA_SK
+       bool "PIC32MZDA Starter Kit"
+       depends on PIC32MZDA
+       select BUILTIN_DTB
+
+endchoice
+
+endif # MACH_PIC32
diff --git a/arch/mips/pic32/Makefile b/arch/mips/pic32/Makefile
new file mode 100644 (file)
index 0000000..fd357f4
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-$(CONFIG_MACH_PIC32) += common/
+obj-$(CONFIG_PIC32MZDA) += pic32mzda/
diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform
new file mode 100644 (file)
index 0000000..cd2084f
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# PIC32MZDA
+#
+platform-$(CONFIG_PIC32MZDA)   += pic32/
+cflags-$(CONFIG_PIC32MZDA)     += -I$(srctree)/arch/mips/include/asm/mach-pic32
+load-$(CONFIG_PIC32MZDA)       += 0xffffffff88000000
+all-$(CONFIG_PIC32MZDA)                := $(COMPRESSION_FNAME).bin
diff --git a/arch/mips/pic32/common/Makefile b/arch/mips/pic32/common/Makefile
new file mode 100644 (file)
index 0000000..be1909c
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y = reset.o irq.o
diff --git a/arch/mips/pic32/common/irq.c b/arch/mips/pic32/common/irq.c
new file mode 100644 (file)
index 0000000..6df347e
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
diff --git a/arch/mips/pic32/common/reset.c b/arch/mips/pic32/common/reset.c
new file mode 100644 (file)
index 0000000..8334575
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/reboot.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define PIC32_RSWRST           0x10
+
+static void pic32_halt(void)
+{
+       while (1) {
+               __asm__(".set push;\n"
+                       ".set arch=r4000;\n"
+                       "wait;\n"
+                       ".set pop;\n"
+               );
+       }
+}
+
+static void pic32_machine_restart(char *command)
+{
+       void __iomem *reg =
+               ioremap(PIC32_BASE_RESET + PIC32_RSWRST, sizeof(u32));
+
+       pic32_syskey_unlock();
+
+       /* magic write/read */
+       __raw_writel(1, reg);
+       (void)__raw_readl(reg);
+
+       pic32_halt();
+}
+
+static void pic32_machine_halt(void)
+{
+       local_irq_disable();
+
+       pic32_halt();
+}
+
+static int __init mips_reboot_setup(void)
+{
+       _machine_restart = pic32_machine_restart;
+       _machine_halt = pic32_machine_halt;
+       pm_power_off = pic32_machine_halt;
+
+       return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile
new file mode 100644 (file)
index 0000000..4a4c272
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y                  := init.o time.o config.o
+
+obj-$(CONFIG_EARLY_PRINTK)     += early_console.o      \
+                                  early_pin.o          \
+                                  early_clk.o
diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c
new file mode 100644 (file)
index 0000000..fe293a0
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+#define PIC32_CFGCON   0x0000
+#define PIC32_DEVID    0x0020
+#define PIC32_SYSKEY   0x0030
+#define PIC32_CFGEBIA  0x00c0
+#define PIC32_CFGEBIC  0x00d0
+#define PIC32_CFGCON2  0x00f0
+#define PIC32_RCON     0x1240
+
+static void __iomem *pic32_conf_base;
+static DEFINE_SPINLOCK(config_lock);
+static u32 pic32_reset_status;
+
+static u32 pic32_conf_get_reg_field(u32 offset, u32 rshift, u32 mask)
+{
+       u32 v;
+
+       v = readl(pic32_conf_base + offset);
+       v >>= rshift;
+       v &= mask;
+
+       return v;
+}
+
+static u32 pic32_conf_modify_atomic(u32 offset, u32 mask, u32 set)
+{
+       u32 v;
+       unsigned long flags;
+
+       spin_lock_irqsave(&config_lock, flags);
+       v = readl(pic32_conf_base + offset);
+       v &= ~mask;
+       v |= (set & mask);
+       writel(v, pic32_conf_base + offset);
+       spin_unlock_irqrestore(&config_lock, flags);
+
+       return 0;
+}
+
+int pic32_enable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), BIT(31));
+}
+
+int pic32_disable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), 0);
+}
+
+int pic32_set_lcd_mode(int mode)
+{
+       u32 mask = mode ? BIT(30) : 0;
+
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(30), mask);
+}
+
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrsh, u32 wthrsh)
+{
+       u32 clr, set;
+
+       clr = (0x3ff << 4) | (0x3ff << 16);
+       set = (rthrsh << 4) | (wthrsh << 16);
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, clr, set);
+}
+
+void pic32_syskey_unlock_debug(const char *func, const ulong line)
+{
+       void __iomem *syskey = pic32_conf_base + PIC32_SYSKEY;
+
+       pr_debug("%s: called from %s:%lu\n", __func__, func, line);
+       writel(0x00000000, syskey);
+       writel(0xAA996655, syskey);
+       writel(0x556699AA, syskey);
+}
+
+static u32 pic32_get_device_id(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 0, 0x0fffffff);
+}
+
+static u32 pic32_get_device_version(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 28, 0xf);
+}
+
+u32 pic32_get_boot_status(void)
+{
+       return pic32_reset_status;
+}
+EXPORT_SYMBOL(pic32_get_boot_status);
+
+void __init pic32_config_init(void)
+{
+       pic32_conf_base = ioremap(PIC32_BASE_CONFIG, 0x110);
+       if (!pic32_conf_base)
+               panic("pic32: config base not mapped");
+
+       /* Boot Status */
+       pic32_reset_status = readl(pic32_conf_base + PIC32_RCON);
+       writel(-1, PIC32_CLR(pic32_conf_base + PIC32_RCON));
+
+       /* Device Inforation */
+       pr_info("Device Id: 0x%08x, Device Ver: 0x%04x\n",
+               pic32_get_device_id(),
+               pic32_get_device_version());
+}
diff --git a/arch/mips/pic32/pic32mzda/early_clk.c b/arch/mips/pic32/pic32mzda/early_clk.c
new file mode 100644 (file)
index 0000000..96c090e
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+/* Oscillators, PLL & clocks */
+#define ICLK_MASK      0x00000080
+#define PLLDIV_MASK    0x00000007
+#define CUROSC_MASK    0x00000007
+#define PLLMUL_MASK    0x0000007F
+#define PB_MASK                0x00000007
+#define FRC1           0
+#define FRC2           7
+#define SPLL           1
+#define POSC           2
+#define FRC_CLK                8000000
+
+#define PIC32_POSC_FREQ        24000000
+
+#define OSCCON         0x0000
+#define SPLLCON                0x0020
+#define PB1DIV         0x0140
+
+u32 pic32_get_sysclk(void)
+{
+       u32 osc_freq = 0;
+       u32 pllclk;
+       u32 frcdivn;
+       u32 osccon;
+       u32 spllcon;
+       int curr_osc;
+
+       u32 plliclk;
+       u32 pllidiv;
+       u32 pllodiv;
+       u32 pllmult;
+       u32 frcdiv;
+
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+
+       osccon = __raw_readl(osc_base + OSCCON);
+       spllcon = __raw_readl(osc_base + SPLLCON);
+
+       plliclk = (spllcon & ICLK_MASK);
+       pllidiv = ((spllcon >> 8) & PLLDIV_MASK) + 1;
+       pllodiv = ((spllcon >> 24) & PLLDIV_MASK);
+       pllmult = ((spllcon >> 16) & PLLMUL_MASK) + 1;
+       frcdiv = ((osccon >> 24) & PLLDIV_MASK);
+
+       pllclk = plliclk ? FRC_CLK : PIC32_POSC_FREQ;
+       frcdivn = ((1 << frcdiv) + 1) + (128 * (frcdiv == 7));
+
+       if (pllodiv < 2)
+               pllodiv = 2;
+       else if (pllodiv < 5)
+               pllodiv = (1 << pllodiv);
+       else
+               pllodiv = 32;
+
+       curr_osc = (int)((osccon >> 12) & CUROSC_MASK);
+
+       switch (curr_osc) {
+       case FRC1:
+       case FRC2:
+               osc_freq = FRC_CLK / frcdivn;
+               break;
+       case SPLL:
+               osc_freq = ((pllclk / pllidiv) * pllmult) / pllodiv;
+               break;
+       case POSC:
+               osc_freq = PIC32_POSC_FREQ;
+               break;
+       default:
+               break;
+       }
+
+       iounmap(osc_base);
+
+       return osc_freq;
+}
+
+u32 pic32_get_pbclk(int bus)
+{
+       u32 clk_freq;
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+       u32 pbxdiv = PB1DIV + ((bus - 1) * 0x10);
+       u32 pbdiv = (__raw_readl(osc_base + pbxdiv) & PB_MASK) + 1;
+
+       iounmap(osc_base);
+
+       clk_freq = pic32_get_sysclk();
+
+       return clk_freq / pbdiv;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
new file mode 100644 (file)
index 0000000..d7b7834
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+#include <asm/fw/fw.h>
+
+#include "pic32mzda.h"
+#include "early_pin.h"
+
+/* Default early console parameters */
+#define EARLY_CONSOLE_PORT     1
+#define EARLY_CONSOLE_BAUDRATE 115200
+
+#define UART_ENABLE            BIT(15)
+#define UART_ENABLE_RX         BIT(12)
+#define UART_ENABLE_TX         BIT(10)
+#define UART_TX_FULL           BIT(9)
+
+/* UART1(x == 0) - UART6(x == 5) */
+#define UART_BASE(x)   ((x) * 0x0200)
+#define U_MODE(x)      UART_BASE(x)
+#define U_STA(x)       (UART_BASE(x) + 0x10)
+#define U_TXR(x)       (UART_BASE(x) + 0x20)
+#define U_BRG(x)       (UART_BASE(x) + 0x40)
+
+static void __iomem *uart_base;
+static char console_port = -1;
+
+static int __init configure_uart_pins(int port)
+{
+       switch (port) {
+       case 1:
+               pic32_pps_input(IN_FUNC_U2RX, IN_RPB0);
+               pic32_pps_output(OUT_FUNC_U2TX, OUT_RPG9);
+               break;
+       case 5:
+               pic32_pps_input(IN_FUNC_U6RX, IN_RPD0);
+               pic32_pps_output(OUT_FUNC_U6TX, OUT_RPB8);
+               break;
+       default:
+               return -1;
+       }
+
+       return 0;
+}
+
+static void __init configure_uart(char port, int baud)
+{
+       u32 pbclk;
+
+       pbclk = pic32_get_pbclk(2);
+
+       __raw_writel(0, uart_base + U_MODE(port));
+       __raw_writel(((pbclk / baud) / 16) - 1, uart_base + U_BRG(port));
+       __raw_writel(UART_ENABLE, uart_base + U_MODE(port));
+       __raw_writel(UART_ENABLE_TX | UART_ENABLE_RX,
+                    uart_base + PIC32_SET(U_STA(port)));
+}
+
+static void __init setup_early_console(char port, int baud)
+{
+       if (configure_uart_pins(port))
+               return;
+
+       console_port = port;
+       configure_uart(console_port, baud);
+}
+
+static char * __init pic32_getcmdline(void)
+{
+       /*
+        * arch_mem_init() has not been called yet, so we don't have a real
+        * command line setup if using CONFIG_CMDLINE_BOOL.
+        */
+#ifdef CONFIG_CMDLINE_OVERRIDE
+       return CONFIG_CMDLINE;
+#else
+       return fw_getcmdline();
+#endif
+}
+
+static int __init get_port_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int port = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 4;
+               else
+                       goto _out;
+
+               port = (*s) - '0';
+       }
+
+_out:
+       return port;
+}
+
+static int __init get_baud_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int baud = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 6;
+               else
+                       goto _out;
+
+               baud = 0;
+               while (*s >= '0' && *s <= '9')
+                       baud = baud * 10 + *s++ - '0';
+       }
+
+_out:
+       return baud;
+}
+
+void __init fw_init_early_console(char port)
+{
+       char *arch_cmdline = pic32_getcmdline();
+       int baud = -1;
+
+       uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00);
+
+       baud = get_baud_from_cmdline(arch_cmdline);
+       if (port == -1)
+               port = get_port_from_cmdline(arch_cmdline);
+
+       if (port == -1)
+               port = EARLY_CONSOLE_PORT;
+
+       if (baud == -1)
+               baud = EARLY_CONSOLE_BAUDRATE;
+
+       setup_early_console(port, baud);
+}
+
+int prom_putchar(char c)
+{
+       if (console_port >= 0) {
+               while (__raw_readl(
+                               uart_base + U_STA(console_port)) & UART_TX_FULL)
+                       ;
+
+               __raw_writel(c, uart_base + U_TXR(console_port));
+       }
+
+       return 1;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c
new file mode 100644 (file)
index 0000000..aa673f8
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/io.h>
+
+#include "early_pin.h"
+
+#define PPS_BASE 0x1f800000
+
+/* Input PPS Registers */
+#define INT1R 0x1404
+#define INT2R 0x1408
+#define INT3R 0x140C
+#define INT4R 0x1410
+#define T2CKR 0x1418
+#define T3CKR 0x141C
+#define T4CKR 0x1420
+#define T5CKR 0x1424
+#define T6CKR 0x1428
+#define T7CKR 0x142C
+#define T8CKR 0x1430
+#define T9CKR 0x1434
+#define IC1R 0x1438
+#define IC2R 0x143C
+#define IC3R 0x1440
+#define IC4R 0x1444
+#define IC5R 0x1448
+#define IC6R 0x144C
+#define IC7R 0x1450
+#define IC8R 0x1454
+#define IC9R 0x1458
+#define OCFAR 0x1460
+#define U1RXR 0x1468
+#define U1CTSR 0x146C
+#define U2RXR 0x1470
+#define U2CTSR 0x1474
+#define U3RXR 0x1478
+#define U3CTSR 0x147C
+#define U4RXR 0x1480
+#define U4CTSR 0x1484
+#define U5RXR 0x1488
+#define U5CTSR 0x148C
+#define U6RXR 0x1490
+#define U6CTSR 0x1494
+#define SDI1R 0x149C
+#define SS1R 0x14A0
+#define SDI2R 0x14A8
+#define SS2R 0x14AC
+#define SDI3R 0x14B4
+#define SS3R 0x14B8
+#define SDI4R 0x14C0
+#define SS4R 0x14C4
+#define SDI5R 0x14CC
+#define SS5R 0x14D0
+#define SDI6R 0x14D8
+#define SS6R 0x14DC
+#define C1RXR 0x14E0
+#define C2RXR 0x14E4
+#define REFCLKI1R 0x14E8
+#define REFCLKI3R 0x14F0
+#define REFCLKI4R 0x14F4
+
+static const struct
+{
+       int function;
+       int reg;
+} input_pin_reg[] = {
+       { IN_FUNC_INT3, INT3R },
+       { IN_FUNC_T2CK, T2CKR },
+       { IN_FUNC_T6CK, T6CKR },
+       { IN_FUNC_IC3, IC3R  },
+       { IN_FUNC_IC7, IC7R },
+       { IN_FUNC_U1RX, U1RXR },
+       { IN_FUNC_U2CTS, U2CTSR },
+       { IN_FUNC_U5RX, U5RXR },
+       { IN_FUNC_U6CTS, U6CTSR },
+       { IN_FUNC_SDI1, SDI1R },
+       { IN_FUNC_SDI3, SDI3R },
+       { IN_FUNC_SDI5, SDI5R },
+       { IN_FUNC_SS6, SS6R },
+       { IN_FUNC_REFCLKI1, REFCLKI1R },
+       { IN_FUNC_INT4, INT4R },
+       { IN_FUNC_T5CK, T5CKR },
+       { IN_FUNC_T7CK, T7CKR },
+       { IN_FUNC_IC4, IC4R },
+       { IN_FUNC_IC8, IC8R },
+       { IN_FUNC_U3RX, U3RXR },
+       { IN_FUNC_U4CTS, U4CTSR },
+       { IN_FUNC_SDI2, SDI2R },
+       { IN_FUNC_SDI4, SDI4R },
+       { IN_FUNC_C1RX, C1RXR },
+       { IN_FUNC_REFCLKI4, REFCLKI4R },
+       { IN_FUNC_INT2, INT2R },
+       { IN_FUNC_T3CK, T3CKR },
+       { IN_FUNC_T8CK, T8CKR },
+       { IN_FUNC_IC2, IC2R },
+       { IN_FUNC_IC5, IC5R },
+       { IN_FUNC_IC9, IC9R },
+       { IN_FUNC_U1CTS, U1CTSR },
+       { IN_FUNC_U2RX, U2RXR },
+       { IN_FUNC_U5CTS, U5CTSR },
+       { IN_FUNC_SS1, SS1R },
+       { IN_FUNC_SS3, SS3R },
+       { IN_FUNC_SS4, SS4R },
+       { IN_FUNC_SS5, SS5R },
+       { IN_FUNC_C2RX, C2RXR },
+       { IN_FUNC_INT1, INT1R },
+       { IN_FUNC_T4CK, T4CKR },
+       { IN_FUNC_T9CK, T9CKR },
+       { IN_FUNC_IC1, IC1R },
+       { IN_FUNC_IC6, IC6R },
+       { IN_FUNC_U3CTS, U3CTSR },
+       { IN_FUNC_U4RX, U4RXR },
+       { IN_FUNC_U6RX, U6RXR },
+       { IN_FUNC_SS2, SS2R },
+       { IN_FUNC_SDI6, SDI6R },
+       { IN_FUNC_OCFA, OCFAR },
+       { IN_FUNC_REFCLKI3, REFCLKI3R },
+};
+
+void pic32_pps_input(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) {
+               if (input_pin_reg[i].function == function) {
+                       __raw_writel(pin, pps_base + input_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
+
+/* Output PPS Registers */
+#define RPA14R 0x1538
+#define RPA15R 0x153C
+#define RPB0R 0x1540
+#define RPB1R 0x1544
+#define RPB2R 0x1548
+#define RPB3R 0x154C
+#define RPB5R 0x1554
+#define RPB6R 0x1558
+#define RPB7R 0x155C
+#define RPB8R 0x1560
+#define RPB9R 0x1564
+#define RPB10R 0x1568
+#define RPB14R 0x1578
+#define RPB15R 0x157C
+#define RPC1R 0x1584
+#define RPC2R 0x1588
+#define RPC3R 0x158C
+#define RPC4R 0x1590
+#define RPC13R 0x15B4
+#define RPC14R 0x15B8
+#define RPD0R 0x15C0
+#define RPD1R 0x15C4
+#define RPD2R 0x15C8
+#define RPD3R 0x15CC
+#define RPD4R 0x15D0
+#define RPD5R 0x15D4
+#define RPD6R 0x15D8
+#define RPD7R 0x15DC
+#define RPD9R 0x15E4
+#define RPD10R 0x15E8
+#define RPD11R 0x15EC
+#define RPD12R 0x15F0
+#define RPD14R 0x15F8
+#define RPD15R 0x15FC
+#define RPE3R 0x160C
+#define RPE5R 0x1614
+#define RPE8R 0x1620
+#define RPE9R 0x1624
+#define RPF0R 0x1640
+#define RPF1R 0x1644
+#define RPF2R 0x1648
+#define RPF3R 0x164C
+#define RPF4R 0x1650
+#define RPF5R 0x1654
+#define RPF8R 0x1660
+#define RPF12R 0x1670
+#define RPF13R 0x1674
+#define RPG0R 0x1680
+#define RPG1R 0x1684
+#define RPG6R 0x1698
+#define RPG7R 0x169C
+#define RPG8R 0x16A0
+#define RPG9R 0x16A4
+
+static const struct
+{
+       int pin;
+       int reg;
+} output_pin_reg[] = {
+       { OUT_RPD2, RPD2R },
+       { OUT_RPG8, RPG8R },
+       { OUT_RPF4, RPF4R },
+       { OUT_RPD10, RPD10R },
+       { OUT_RPF1, RPF1R },
+       { OUT_RPB9, RPB9R },
+       { OUT_RPB10, RPB10R },
+       { OUT_RPC14, RPC14R },
+       { OUT_RPB5, RPB5R },
+       { OUT_RPC1, RPC1R },
+       { OUT_RPD14, RPD14R },
+       { OUT_RPG1, RPG1R },
+       { OUT_RPA14, RPA14R },
+       { OUT_RPD6, RPD6R },
+       { OUT_RPD3, RPD3R },
+       { OUT_RPG7, RPG7R },
+       { OUT_RPF5, RPF5R },
+       { OUT_RPD11, RPD11R },
+       { OUT_RPF0, RPF0R },
+       { OUT_RPB1, RPB1R },
+       { OUT_RPE5, RPE5R },
+       { OUT_RPC13, RPC13R },
+       { OUT_RPB3, RPB3R },
+       { OUT_RPC4, RPC4R },
+       { OUT_RPD15, RPD15R },
+       { OUT_RPG0, RPG0R },
+       { OUT_RPA15, RPA15R },
+       { OUT_RPD7, RPD7R },
+       { OUT_RPD9, RPD9R },
+       { OUT_RPG6, RPG6R },
+       { OUT_RPB8, RPB8R },
+       { OUT_RPB15, RPB15R },
+       { OUT_RPD4, RPD4R },
+       { OUT_RPB0, RPB0R },
+       { OUT_RPE3, RPE3R },
+       { OUT_RPB7, RPB7R },
+       { OUT_RPF12, RPF12R },
+       { OUT_RPD12, RPD12R },
+       { OUT_RPF8, RPF8R },
+       { OUT_RPC3, RPC3R },
+       { OUT_RPE9, RPE9R },
+       { OUT_RPD1, RPD1R },
+       { OUT_RPG9, RPG9R },
+       { OUT_RPB14, RPB14R },
+       { OUT_RPD0, RPD0R },
+       { OUT_RPB6, RPB6R },
+       { OUT_RPD5, RPD5R },
+       { OUT_RPB2, RPB2R },
+       { OUT_RPF3, RPF3R },
+       { OUT_RPF13, RPF13R },
+       { OUT_RPC2, RPC2R },
+       { OUT_RPE8, RPE8R },
+       { OUT_RPF2, RPF2R },
+};
+
+void pic32_pps_output(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) {
+               if (output_pin_reg[i].pin == pin) {
+                       __raw_writel(function,
+                               pps_base + output_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.h b/arch/mips/pic32/pic32mzda/early_pin.h
new file mode 100644 (file)
index 0000000..417fae9
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef _PIC32MZDA_EARLY_PIN_H
+#define _PIC32MZDA_EARLY_PIN_H
+
+/*
+ * This is a complete, yet overly simplistic and unoptimized, PIC32MZDA PPS
+ * configuration only useful before we have full pinctrl initialized.
+ */
+
+/* Input PPS Functions */
+enum {
+       IN_FUNC_INT3,
+       IN_FUNC_T2CK,
+       IN_FUNC_T6CK,
+       IN_FUNC_IC3,
+       IN_FUNC_IC7,
+       IN_FUNC_U1RX,
+       IN_FUNC_U2CTS,
+       IN_FUNC_U5RX,
+       IN_FUNC_U6CTS,
+       IN_FUNC_SDI1,
+       IN_FUNC_SDI3,
+       IN_FUNC_SDI5,
+       IN_FUNC_SS6,
+       IN_FUNC_REFCLKI1,
+       IN_FUNC_INT4,
+       IN_FUNC_T5CK,
+       IN_FUNC_T7CK,
+       IN_FUNC_IC4,
+       IN_FUNC_IC8,
+       IN_FUNC_U3RX,
+       IN_FUNC_U4CTS,
+       IN_FUNC_SDI2,
+       IN_FUNC_SDI4,
+       IN_FUNC_C1RX,
+       IN_FUNC_REFCLKI4,
+       IN_FUNC_INT2,
+       IN_FUNC_T3CK,
+       IN_FUNC_T8CK,
+       IN_FUNC_IC2,
+       IN_FUNC_IC5,
+       IN_FUNC_IC9,
+       IN_FUNC_U1CTS,
+       IN_FUNC_U2RX,
+       IN_FUNC_U5CTS,
+       IN_FUNC_SS1,
+       IN_FUNC_SS3,
+       IN_FUNC_SS4,
+       IN_FUNC_SS5,
+       IN_FUNC_C2RX,
+       IN_FUNC_INT1,
+       IN_FUNC_T4CK,
+       IN_FUNC_T9CK,
+       IN_FUNC_IC1,
+       IN_FUNC_IC6,
+       IN_FUNC_U3CTS,
+       IN_FUNC_U4RX,
+       IN_FUNC_U6RX,
+       IN_FUNC_SS2,
+       IN_FUNC_SDI6,
+       IN_FUNC_OCFA,
+       IN_FUNC_REFCLKI3,
+};
+
+/* Input PPS Pins */
+#define IN_RPD2 0x00
+#define IN_RPG8 0x01
+#define IN_RPF4 0x02
+#define IN_RPD10 0x03
+#define IN_RPF1 0x04
+#define IN_RPB9 0x05
+#define IN_RPB10 0x06
+#define IN_RPC14 0x07
+#define IN_RPB5 0x08
+#define IN_RPC1 0x0A
+#define IN_RPD14 0x0B
+#define IN_RPG1 0x0C
+#define IN_RPA14 0x0D
+#define IN_RPD6 0x0E
+#define IN_RPD3 0x00
+#define IN_RPG7 0x01
+#define IN_RPF5 0x02
+#define IN_RPD11 0x03
+#define IN_RPF0 0x04
+#define IN_RPB1 0x05
+#define IN_RPE5 0x06
+#define IN_RPC13 0x07
+#define IN_RPB3 0x08
+#define IN_RPC4 0x0A
+#define IN_RPD15 0x0B
+#define IN_RPG0 0x0C
+#define IN_RPA15 0x0D
+#define IN_RPD7 0x0E
+#define IN_RPD9 0x00
+#define IN_RPG6 0x01
+#define IN_RPB8 0x02
+#define IN_RPB15 0x03
+#define IN_RPD4 0x04
+#define IN_RPB0 0x05
+#define IN_RPE3 0x06
+#define IN_RPB7 0x07
+#define IN_RPF12 0x09
+#define IN_RPD12 0x0A
+#define IN_RPF8 0x0B
+#define IN_RPC3 0x0C
+#define IN_RPE9 0x0D
+#define IN_RPD1 0x00
+#define IN_RPG9 0x01
+#define IN_RPB14 0x02
+#define IN_RPD0 0x03
+#define IN_RPB6 0x05
+#define IN_RPD5 0x06
+#define IN_RPB2 0x07
+#define IN_RPF3 0x08
+#define IN_RPF13 0x09
+#define IN_RPF2 0x0B
+#define IN_RPC2 0x0C
+#define IN_RPE8 0x0D
+
+/* Output PPS Pins */
+enum {
+       OUT_RPD2,
+       OUT_RPG8,
+       OUT_RPF4,
+       OUT_RPD10,
+       OUT_RPF1,
+       OUT_RPB9,
+       OUT_RPB10,
+       OUT_RPC14,
+       OUT_RPB5,
+       OUT_RPC1,
+       OUT_RPD14,
+       OUT_RPG1,
+       OUT_RPA14,
+       OUT_RPD6,
+       OUT_RPD3,
+       OUT_RPG7,
+       OUT_RPF5,
+       OUT_RPD11,
+       OUT_RPF0,
+       OUT_RPB1,
+       OUT_RPE5,
+       OUT_RPC13,
+       OUT_RPB3,
+       OUT_RPC4,
+       OUT_RPD15,
+       OUT_RPG0,
+       OUT_RPA15,
+       OUT_RPD7,
+       OUT_RPD9,
+       OUT_RPG6,
+       OUT_RPB8,
+       OUT_RPB15,
+       OUT_RPD4,
+       OUT_RPB0,
+       OUT_RPE3,
+       OUT_RPB7,
+       OUT_RPF12,
+       OUT_RPD12,
+       OUT_RPF8,
+       OUT_RPC3,
+       OUT_RPE9,
+       OUT_RPD1,
+       OUT_RPG9,
+       OUT_RPB14,
+       OUT_RPD0,
+       OUT_RPB6,
+       OUT_RPD5,
+       OUT_RPB2,
+       OUT_RPF3,
+       OUT_RPF13,
+       OUT_RPC2,
+       OUT_RPE8,
+       OUT_RPF2,
+};
+
+/* Output PPS Functions */
+#define OUT_FUNC_U3TX 0x01
+#define OUT_FUNC_U4RTS 0x02
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_SS6 0x0A
+#define OUT_FUNC_OC3 0x0B
+#define OUT_FUNC_OC6 0x0C
+#define OUT_FUNC_REFCLKO4 0x0D
+#define OUT_FUNC_C2OUT 0x0E
+#define OUT_FUNC_C1TX 0x0F
+#define OUT_FUNC_U1TX 0x01
+#define OUT_FUNC_U2RTS 0x02
+#define OUT_FUNC_U5TX 0x03
+#define OUT_FUNC_U6RTS 0x04
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_OC4 0x0B
+#define OUT_FUNC_OC7 0x0C
+#define OUT_FUNC_REFCLKO1 0x0F
+#define OUT_FUNC_U3RTS 0x01
+#define OUT_FUNC_U4TX 0x02
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS1 0x05
+#define OUT_FUNC_SS3 0x07
+#define OUT_FUNC_SS4 0x08
+#define OUT_FUNC_SS5 0x09
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC5 0x0B
+#define OUT_FUNC_OC8 0x0C
+#define OUT_FUNC_C1OUT 0x0E
+#define OUT_FUNC_REFCLKO3 0x0F
+#define OUT_FUNC_U1RTS 0x01
+#define OUT_FUNC_U2TX 0x02
+#define OUT_FUNC_U5RTS 0x03
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS2 0x06
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC2 0x0B
+#define OUT_FUNC_OC1 0x0C
+#define OUT_FUNC_OC9 0x0D
+#define OUT_FUNC_C2TX 0x0F
+
+void pic32_pps_input(int function, int pin);
+void pic32_pps_output(int function, int pin);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
new file mode 100644 (file)
index 0000000..775ff90
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Joshua Henderson, joshua.henderson@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/sdhci-pic32.h>
+
+#include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/prom.h>
+
+#include "pic32mzda.h"
+
+const char *get_system_type(void)
+{
+       return "PIC32MZDA";
+}
+
+static ulong get_fdtaddr(void)
+{
+       ulong ftaddr = 0;
+
+       if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
+               return (ulong)fw_arg1;
+
+       if (__dtb_start < __dtb_end)
+               ftaddr = (ulong)__dtb_start;
+
+       return ftaddr;
+}
+
+void __init plat_mem_setup(void)
+{
+       void *dtb;
+
+       dtb = (void *)get_fdtaddr();
+       if (!dtb) {
+               pr_err("pic32: no DTB found.\n");
+               return;
+       }
+
+       /*
+        * Load the builtin device tree. This causes the chosen node to be
+        * parsed resulting in our memory appearing.
+        */
+       __dt_setup_arch(dtb);
+
+       pr_info("Found following command lines\n");
+       pr_info(" boot_command_line: %s\n", boot_command_line);
+       pr_info(" arcs_cmdline     : %s\n", arcs_cmdline);
+#ifdef CONFIG_CMDLINE_BOOL
+       pr_info(" builtin_cmdline  : %s\n", CONFIG_CMDLINE);
+#endif
+       if (dtb != __dtb_start)
+               strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_EARLY_PRINTK
+       fw_init_early_console(-1);
+#endif
+       pic32_config_init();
+}
+
+static __init void pic32_init_cmdline(int argc, char *argv[])
+{
+       unsigned int count = COMMAND_LINE_SIZE - 1;
+       int i;
+       char *dst = &(arcs_cmdline[0]);
+       char *src;
+
+       for (i = 1; i < argc && count; ++i) {
+               src = argv[i];
+               while (*src && count) {
+                       *dst++ = *src++;
+                       --count;
+               }
+               *dst++ = ' ';
+       }
+       if (i > 1)
+               --dst;
+
+       *dst = 0;
+}
+
+void __init prom_init(void)
+{
+       pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1);
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+void __init device_tree_init(void)
+{
+       if (!initial_boot_params)
+               return;
+
+       unflatten_and_copy_device_tree();
+}
+
+static struct pic32_sdhci_platform_data sdhci_data = {
+       .setup_dma = pic32_set_sdhci_adma_fifo_threshold,
+};
+
+static struct of_dev_auxdata pic32_auxdata_lookup[] __initdata = {
+       OF_DEV_AUXDATA("microchip,pic32mzda-sdhci", 0, "sdhci", &sdhci_data),
+       { /* sentinel */}
+};
+
+static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
+{
+       struct device_node *root, *np;
+       struct resource res;
+
+       root = of_find_node_by_path("/");
+
+       for (; lookup->compatible; lookup++) {
+               np = of_find_compatible_node(NULL, NULL, lookup->compatible);
+               if (np) {
+                       lookup->name = (char *)np->name;
+                       if (lookup->phys_addr)
+                               continue;
+                       if (!of_address_to_resource(np, 0, &res))
+                               lookup->phys_addr = res.start;
+               }
+       }
+
+       return 0;
+}
+
+static int __init plat_of_setup(void)
+{
+       if (!of_have_populated_dt())
+               panic("Device tree not present");
+
+       pic32_of_prepare_platform_data(pic32_auxdata_lookup);
+       if (of_platform_populate(NULL, of_default_bus_match_table,
+                                pic32_auxdata_lookup, NULL))
+               panic("Failed to populate DT");
+
+       return 0;
+}
+arch_initcall(plat_of_setup);
diff --git a/arch/mips/pic32/pic32mzda/pic32mzda.h b/arch/mips/pic32/pic32mzda/pic32mzda.h
new file mode 100644 (file)
index 0000000..96d10e2
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef PIC32MZDA_COMMON_H
+#define PIC32MZDA_COMMON_H
+
+/* early clock */
+u32 pic32_get_pbclk(int bus);
+u32 pic32_get_sysclk(void);
+
+/* Device configuration */
+void __init pic32_config_init(void);
+int pic32_set_lcd_mode(int mode);
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrs, u32 wthrs);
+u32 pic32_get_boot_status(void);
+int pic32_disable_lcd(void);
+int pic32_enable_lcd(void);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
new file mode 100644 (file)
index 0000000..ca6a62b
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/time.h>
+
+#include "pic32mzda.h"
+
+static const struct of_device_id pic32_infra_match[] = {
+       { .compatible = "microchip,pic32mzda-infra", },
+       { },
+};
+
+#define DEFAULT_CORE_TIMER_INTERRUPT 0
+
+static unsigned int pic32_xlate_core_timer_irq(void)
+{
+       static struct device_node *node;
+       unsigned int irq;
+
+       node = of_find_matching_node(NULL, pic32_infra_match);
+
+       if (WARN_ON(!node))
+               goto default_map;
+
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               goto default_map;
+
+       return irq;
+
+default_map:
+
+       return irq_create_mapping(NULL, DEFAULT_CORE_TIMER_INTERRUPT);
+}
+
+unsigned int get_c0_compare_int(void)
+{
+       return pic32_xlate_core_timer_irq();
+}
+
+void __init plat_time_init(void)
+{
+       struct clk *clk;
+
+       of_clk_init(NULL);
+       clk = clk_get_sys("cpu_clk", NULL);
+       if (IS_ERR(clk))
+               panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
+
+       clk_prepare_enable(clk);
+       pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
+       mips_hpt_frequency = clk_get_rate(clk) / 2;
+
+       clocksource_probe();
+}
index e9bc8c9..813826a 100644 (file)
@@ -12,6 +12,11 @@ config RALINK_ILL_ACC
        depends on SOC_RT305X
        default y
 
+config IRQ_INTC
+       bool
+       default y
+       depends on !SOC_MT7621
+
 choice
        prompt "Ralink SoC selection"
        default SOC_RT305X
@@ -33,7 +38,18 @@ choice
 
        config SOC_MT7620
                bool "MT7620/8"
+               select HW_HAS_PCI
 
+       config SOC_MT7621
+               bool "MT7621"
+               select MIPS_CPU_SCACHE
+               select SYS_SUPPORTS_MULTITHREADING
+               select SYS_SUPPORTS_SMP
+               select SYS_SUPPORTS_MIPS_CPS
+               select MIPS_GIC
+               select COMMON_CLK
+               select CLKSRC_MIPS_GIC
+               select HW_HAS_PCI
 endchoice
 
 choice
index a6c9d00..0d1795a 100644 (file)
@@ -6,16 +6,24 @@
 # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
 # Copyright (C) 2013 John Crispin <blogic@openwrt.org>
 
-obj-y := prom.o of.o reset.o clk.o irq.o timer.o
+obj-y := prom.o of.o reset.o
+
+ifndef CONFIG_MIPS_GIC
+       obj-y += clk.o timer.o
+endif
 
 obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o
 
 obj-$(CONFIG_RALINK_ILL_ACC) += ill_acc.o
 
+obj-$(CONFIG_IRQ_INTC) += irq.o
+obj-$(CONFIG_MIPS_GIC) += irq-gic.o timer-gic.o
+
 obj-$(CONFIG_SOC_RT288X) += rt288x.o
 obj-$(CONFIG_SOC_RT305X) += rt305x.o
 obj-$(CONFIG_SOC_RT3883) += rt3883.o
 obj-$(CONFIG_SOC_MT7620) += mt7620.o
+obj-$(CONFIG_SOC_MT7621) += mt7621.o
 
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 
index 6d9c8c4..6095fcc 100644 (file)
@@ -27,3 +27,8 @@ cflags-$(CONFIG_SOC_RT3883)   += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt
 #
 load-$(CONFIG_SOC_MT7620)      += 0xffffffff80000000
 cflags-$(CONFIG_SOC_MT7620)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620
+
+# Ralink MT7621
+#
+load-$(CONFIG_SOC_MT7621)      += 0xffffffff80001000
+cflags-$(CONFIG_SOC_MT7621)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7621
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
new file mode 100644 (file)
index 0000000..50d6c55
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/mips-gic.h>
+
+int get_c0_perfcount_int(void)
+{
+       return gic_get_c0_perfcount_int();
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
index dfb04fc..0d3d1a9 100644 (file)
@@ -107,31 +107,31 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
 };
 
 static struct rt2880_pmx_func pwm1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 19, 1),
+       FUNC("sdxc d6", 3, 19, 1),
        FUNC("utif", 2, 19, 1),
        FUNC("gpio", 1, 19, 1),
-       FUNC("pwm", 0, 19, 1),
+       FUNC("pwm1", 0, 19, 1),
 };
 
 static struct rt2880_pmx_func pwm0_grp_mt7628[] = {
-       FUNC("sdcx", 3, 18, 1),
+       FUNC("sdxc d7", 3, 18, 1),
        FUNC("utif", 2, 18, 1),
        FUNC("gpio", 1, 18, 1),
-       FUNC("pwm", 0, 18, 1),
+       FUNC("pwm0", 0, 18, 1),
 };
 
 static struct rt2880_pmx_func uart2_grp_mt7628[] = {
-       FUNC("sdcx", 3, 20, 2),
+       FUNC("sdxc d5 d4", 3, 20, 2),
        FUNC("pwm", 2, 20, 2),
        FUNC("gpio", 1, 20, 2),
-       FUNC("uart", 0, 20, 2),
+       FUNC("uart2", 0, 20, 2),
 };
 
 static struct rt2880_pmx_func uart1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 45, 2),
+       FUNC("sw_r", 3, 45, 2),
        FUNC("pwm", 2, 45, 2),
        FUNC("gpio", 1, 45, 2),
-       FUNC("uart", 0, 45, 2),
+       FUNC("uart1", 0, 45, 2),
 };
 
 static struct rt2880_pmx_func i2c_grp_mt7628[] = {
@@ -143,21 +143,21 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = {
 
 static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) };
 static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) };
-static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 15, 38) };
+static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
 static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
 
 static struct rt2880_pmx_func sd_mode_grp_mt7628[] = {
        FUNC("jtag", 3, 22, 8),
        FUNC("utif", 2, 22, 8),
        FUNC("gpio", 1, 22, 8),
-       FUNC("sdcx", 0, 22, 8),
+       FUNC("sdxc", 0, 22, 8),
 };
 
 static struct rt2880_pmx_func uart0_grp_mt7628[] = {
        FUNC("-", 3, 12, 2),
        FUNC("-", 2, 12, 2),
        FUNC("gpio", 1, 12, 2),
-       FUNC("uart", 0, 12, 2),
+       FUNC("uart0", 0, 12, 2),
 };
 
 static struct rt2880_pmx_func i2s_grp_mt7628[] = {
@@ -171,7 +171,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
        FUNC("-", 3, 6, 1),
        FUNC("refclk", 2, 6, 1),
        FUNC("gpio", 1, 6, 1),
-       FUNC("spi", 0, 6, 1),
+       FUNC("spi cs1", 0, 6, 1),
 };
 
 static struct rt2880_pmx_func spis_grp_mt7628[] = {
@@ -188,28 +188,44 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = {
        FUNC("gpio", 0, 11, 1),
 };
 
-#define MT7628_GPIO_MODE_MASK  0x3
-
-#define MT7628_GPIO_MODE_PWM1  30
-#define MT7628_GPIO_MODE_PWM0  28
-#define MT7628_GPIO_MODE_UART2 26
-#define MT7628_GPIO_MODE_UART1 24
-#define MT7628_GPIO_MODE_I2C   20
-#define MT7628_GPIO_MODE_REFCLK        18
-#define MT7628_GPIO_MODE_PERST 16
-#define MT7628_GPIO_MODE_WDT   14
-#define MT7628_GPIO_MODE_SPI   12
-#define MT7628_GPIO_MODE_SDMODE        10
-#define MT7628_GPIO_MODE_UART0 8
-#define MT7628_GPIO_MODE_I2S   6
-#define MT7628_GPIO_MODE_CS1   4
-#define MT7628_GPIO_MODE_SPIS  2
-#define MT7628_GPIO_MODE_GPIO  0
+static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_kn", 0, 35, 1),
+};
+
+static struct rt2880_pmx_func wled_an_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_an", 0, 35, 1),
+};
+
+#define MT7628_GPIO_MODE_MASK          0x3
+
+#define MT7628_GPIO_MODE_WLED_KN       48
+#define MT7628_GPIO_MODE_WLED_AN       32
+#define MT7628_GPIO_MODE_PWM1          30
+#define MT7628_GPIO_MODE_PWM0          28
+#define MT7628_GPIO_MODE_UART2         26
+#define MT7628_GPIO_MODE_UART1         24
+#define MT7628_GPIO_MODE_I2C           20
+#define MT7628_GPIO_MODE_REFCLK                18
+#define MT7628_GPIO_MODE_PERST         16
+#define MT7628_GPIO_MODE_WDT           14
+#define MT7628_GPIO_MODE_SPI           12
+#define MT7628_GPIO_MODE_SDMODE                10
+#define MT7628_GPIO_MODE_UART0         8
+#define MT7628_GPIO_MODE_I2S           6
+#define MT7628_GPIO_MODE_CS1           4
+#define MT7628_GPIO_MODE_SPIS          2
+#define MT7628_GPIO_MODE_GPIO          0
 
 static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
        GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM1),
-       GRP_G("pmw1", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+       GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM0),
        GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_UART2),
@@ -233,6 +249,10 @@ static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
                                1, MT7628_GPIO_MODE_SPIS),
        GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_GPIO),
+       GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_AN),
+       GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_KN),
        { 0 }
 };
 
@@ -436,10 +456,13 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000100.timer", periph_rate);
        ralink_clk_add("10000120.watchdog", periph_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", periph_rate);
+       ralink_clk_add("10000d00.uart1", periph_rate);
+       ralink_clk_add("10000e00.uart2", periph_rate);
        ralink_clk_add("10180000.wmac", xtal_rate);
 
-       if (IS_ENABLED(CONFIG_USB) && is_mt76x8()) {
+       if (IS_ENABLED(CONFIG_USB) && !is_mt76x8()) {
                /*
                 * When the CPU goes into sleep mode, the BUS clock will be
                 * too low for USB to function properly. Adjust the busses
@@ -552,7 +575,7 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        }
 
        snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
-               "Ralink %s ver:%u eco:%u",
+               "MediaTek %s ver:%u eco:%u",
                name,
                (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
                (rev & CHIP_REV_ECO_MASK));
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
new file mode 100644 (file)
index 0000000..e9b9fa3
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/mipsregs.h>
+#include <asm/smp-ops.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7621.h>
+
+#include <pinmux.h>
+
+#include "common.h"
+
+#define SYSC_REG_SYSCFG                0x10
+#define SYSC_REG_CPLL_CLKCFG0  0x2c
+#define SYSC_REG_CUR_CLK_STS   0x44
+#define CPU_CLK_SEL            (BIT(30) | BIT(31))
+
+#define MT7621_GPIO_MODE_UART1         1
+#define MT7621_GPIO_MODE_I2C           2
+#define MT7621_GPIO_MODE_UART3_MASK    0x3
+#define MT7621_GPIO_MODE_UART3_SHIFT   3
+#define MT7621_GPIO_MODE_UART3_GPIO    1
+#define MT7621_GPIO_MODE_UART2_MASK    0x3
+#define MT7621_GPIO_MODE_UART2_SHIFT   5
+#define MT7621_GPIO_MODE_UART2_GPIO    1
+#define MT7621_GPIO_MODE_JTAG          7
+#define MT7621_GPIO_MODE_WDT_MASK      0x3
+#define MT7621_GPIO_MODE_WDT_SHIFT     8
+#define MT7621_GPIO_MODE_WDT_GPIO      1
+#define MT7621_GPIO_MODE_PCIE_RST      0
+#define MT7621_GPIO_MODE_PCIE_REF      2
+#define MT7621_GPIO_MODE_PCIE_MASK     0x3
+#define MT7621_GPIO_MODE_PCIE_SHIFT    10
+#define MT7621_GPIO_MODE_PCIE_GPIO     1
+#define MT7621_GPIO_MODE_MDIO_MASK     0x3
+#define MT7621_GPIO_MODE_MDIO_SHIFT    12
+#define MT7621_GPIO_MODE_MDIO_GPIO     1
+#define MT7621_GPIO_MODE_RGMII1                14
+#define MT7621_GPIO_MODE_RGMII2                15
+#define MT7621_GPIO_MODE_SPI_MASK      0x3
+#define MT7621_GPIO_MODE_SPI_SHIFT     16
+#define MT7621_GPIO_MODE_SPI_GPIO      1
+#define MT7621_GPIO_MODE_SDHCI_MASK    0x3
+#define MT7621_GPIO_MODE_SDHCI_SHIFT   18
+#define MT7621_GPIO_MODE_SDHCI_GPIO    1
+
+static struct rt2880_pmx_func uart1_grp[] =  { FUNC("uart1", 0, 1, 2) };
+static struct rt2880_pmx_func i2c_grp[] =  { FUNC("i2c", 0, 3, 2) };
+static struct rt2880_pmx_func uart3_grp[] = {
+       FUNC("uart3", 0, 5, 4),
+       FUNC("i2s", 2, 5, 4),
+       FUNC("spdif3", 3, 5, 4),
+};
+static struct rt2880_pmx_func uart2_grp[] = {
+       FUNC("uart2", 0, 9, 4),
+       FUNC("pcm", 2, 9, 4),
+       FUNC("spdif2", 3, 9, 4),
+};
+static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
+static struct rt2880_pmx_func wdt_grp[] = {
+       FUNC("wdt rst", 0, 18, 1),
+       FUNC("wdt refclk", 2, 18, 1),
+};
+static struct rt2880_pmx_func pcie_rst_grp[] = {
+       FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1),
+       FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1)
+};
+static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
+static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
+static struct rt2880_pmx_func spi_grp[] = {
+       FUNC("spi", 0, 34, 7),
+       FUNC("nand1", 2, 34, 7),
+};
+static struct rt2880_pmx_func sdhci_grp[] = {
+       FUNC("sdhci", 0, 41, 8),
+       FUNC("nand2", 2, 41, 8),
+};
+static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
+
+static struct rt2880_pmx_group mt7621_pinmux_data[] = {
+       GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1),
+       GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C),
+       GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK,
+               MT7621_GPIO_MODE_UART3_GPIO, MT7621_GPIO_MODE_UART3_SHIFT),
+       GRP_G("uart2", uart2_grp, MT7621_GPIO_MODE_UART2_MASK,
+               MT7621_GPIO_MODE_UART2_GPIO, MT7621_GPIO_MODE_UART2_SHIFT),
+       GRP("jtag", jtag_grp, 1, MT7621_GPIO_MODE_JTAG),
+       GRP_G("wdt", wdt_grp, MT7621_GPIO_MODE_WDT_MASK,
+               MT7621_GPIO_MODE_WDT_GPIO, MT7621_GPIO_MODE_WDT_SHIFT),
+       GRP_G("pcie", pcie_rst_grp, MT7621_GPIO_MODE_PCIE_MASK,
+               MT7621_GPIO_MODE_PCIE_GPIO, MT7621_GPIO_MODE_PCIE_SHIFT),
+       GRP_G("mdio", mdio_grp, MT7621_GPIO_MODE_MDIO_MASK,
+               MT7621_GPIO_MODE_MDIO_GPIO, MT7621_GPIO_MODE_MDIO_SHIFT),
+       GRP("rgmii2", rgmii2_grp, 1, MT7621_GPIO_MODE_RGMII2),
+       GRP_G("spi", spi_grp, MT7621_GPIO_MODE_SPI_MASK,
+               MT7621_GPIO_MODE_SPI_GPIO, MT7621_GPIO_MODE_SPI_SHIFT),
+       GRP_G("sdhci", sdhci_grp, MT7621_GPIO_MODE_SDHCI_MASK,
+               MT7621_GPIO_MODE_SDHCI_GPIO, MT7621_GPIO_MODE_SDHCI_SHIFT),
+       GRP("rgmii1", rgmii1_grp, 1, MT7621_GPIO_MODE_RGMII1),
+       { 0 }
+};
+
+phys_addr_t mips_cpc_default_phys_base(void)
+{
+       panic("Cannot detect cpc address");
+}
+
+void __init ralink_clk_init(void)
+{
+       int cpu_fdiv = 0;
+       int cpu_ffrac = 0;
+       int fbdiv = 0;
+       u32 clk_sts, syscfg;
+       u8 clk_sel = 0, xtal_mode;
+       u32 cpu_clk;
+
+       if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0)
+               clk_sel = 1;
+
+       switch (clk_sel) {
+       case 0:
+               clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS);
+               cpu_fdiv = ((clk_sts >> 8) & 0x1F);
+               cpu_ffrac = (clk_sts & 0x1F);
+               cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000;
+               break;
+
+       case 1:
+               fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1;
+               syscfg = rt_sysc_r32(SYSC_REG_SYSCFG);
+               xtal_mode = (syscfg >> 6) & 0x7;
+               if (xtal_mode >= 6) {
+                       /* 25Mhz Xtal */
+                       cpu_clk = 25 * fbdiv * 1000 * 1000;
+               } else if (xtal_mode >= 3) {
+                       /* 40Mhz Xtal */
+                       cpu_clk = 40 * fbdiv * 1000 * 1000;
+               } else {
+                       /* 20Mhz Xtal */
+                       cpu_clk = 20 * fbdiv * 1000 * 1000;
+               }
+               break;
+       }
+}
+
+void __init ralink_of_remap(void)
+{
+       rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc");
+       rt_memc_membase = plat_of_remap_node("mtk,mt7621-memc");
+
+       if (!rt_sysc_membase || !rt_memc_membase)
+               panic("Failed to remap core resources");
+}
+
+void prom_soc_init(struct ralink_soc_info *soc_info)
+{
+       void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE);
+       unsigned char *name = NULL;
+       u32 n0;
+       u32 n1;
+       u32 rev;
+
+       n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);
+       n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1);
+
+       if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) {
+               name = "MT7621";
+               soc_info->compatible = "mtk,mt7621-soc";
+       } else {
+               panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1);
+       }
+
+       rev = __raw_readl(sysc + SYSC_REG_CHIP_REV);
+
+       snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
+               "MediaTek %s ver:%u eco:%u",
+               name,
+               (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
+               (rev & CHIP_REV_ECO_MASK));
+
+       soc_info->mem_size_min = MT7621_DDR2_SIZE_MIN;
+       soc_info->mem_size_max = MT7621_DDR2_SIZE_MAX;
+       soc_info->mem_base = MT7621_DRAM_BASE;
+
+       rt2880_pinmux_data = mt7621_pinmux_data;
+
+       /* Early detection of CMP support */
+       mips_cm_probe();
+       mips_cpc_probe();
+
+       if (mips_cm_numiocu()) {
+               /*
+                * mips_cm_probe() wipes out bootloader
+                * config for CM regions and we have to configure them
+                * again. This SoC cannot talk to pamlbus devices
+                * witout proper iocu region set up.
+                *
+                * FIXME: it would be better to do this with values
+                * from DT, but we need this very early because
+                * without this we cannot talk to pretty much anything
+                * including serial.
+                */
+               write_gcr_reg0_base(MT7621_PALMBUS_BASE);
+               write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE |
+                                   CM_GCR_REGn_MASK_CMTGT_IOCU0);
+       }
+
+       if (!register_cps_smp_ops())
+               return;
+       if (!register_cmp_smp_ops())
+               return;
+       if (!register_vsmp_smp_ops())
+               return;
+}
index 844f5cd..3c84166 100644 (file)
@@ -119,5 +119,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        soc_info->mem_size_max = RT2880_MEM_SIZE_MAX;
 
        rt2880_pinmux_data = rt2880_pinmux_data_act;
-       ralink_soc == RT2880_SOC;
+       ralink_soc = RT2880_SOC;
 }
index 9e45725..d7c4ba4 100644 (file)
@@ -201,6 +201,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("cpu", cpu_rate);
        ralink_clk_add("sys", sys_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000100.timer", wdt_rate);
        ralink_clk_add("10000120.watchdog", wdt_rate);
        ralink_clk_add("10000500.uart", uart_rate);
index 582995a..fafec94 100644 (file)
@@ -109,6 +109,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000120.watchdog", sys_rate);
        ralink_clk_add("10000500.uart", 40000000);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", 40000000);
        ralink_clk_add("10100000.ethernet", sys_rate);
        ralink_clk_add("10180000.wmac", 40000000);
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
new file mode 100644 (file)
index 0000000..5b4f186
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+
+#include "common.h"
+
+void __init plat_time_init(void)
+{
+       ralink_of_remap();
+
+       of_clk_init(NULL);
+       clocksource_probe();
+}
index 650d5d3..fd11085 100644 (file)
@@ -89,7 +89,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
        struct rb532_gpio_chip  *gpch;
 
        gpch = container_of(chip, struct rb532_gpio_chip, chip);
-       return rb532_get_bit(offset, gpch->regbase + GPIOD);
+       return !!rb532_get_bit(offset, gpch->regbase + GPIOD);
 }
 
 /*
index 9d9962a..2fd350f 100644 (file)
@@ -689,7 +689,7 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
 {
        struct txx9_iocled_data *data =
                container_of(chip, struct txx9_iocled_data, chip);
-       return data->cur_val & (1 << offset);
+       return !!(data->cur_val & (1 << offset));
 }
 
 static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
index 06f17e7..8d1c816 100644 (file)
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                        _PAGE_ACCESSED | _PAGE_THP_HUGE)
+                        _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+                        _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
index 8204b0c..8d1c41d 100644 (file)
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_pfn(pmd)           pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)         pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)         pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)     pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)       pte_pmd(pte_mkdirty(pmd_pte(pmd)))
index 271fefb..9d08d8c 100644 (file)
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS          NR_CPUS
 #define KVM_MAX_VCORES         NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS     512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
index 5654ece..3fa9df7 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
index 6a5ace5..1f2594d 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            379
+#define NR_syscalls            380
 
 #define __NR__exit __NR_exit
 
index 12a0565..940290d 100644 (file)
 #define __NR_userfaultfd       364
 #define __NR_membarrier                365
 #define __NR_mlock2            378
+#define __NR_copy_file_range   379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index 8654cb1..ca9e537 100644 (file)
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
        struct pci_bus *bus = eeh_pe_bus_get(pe);
-       struct device_node *dn = pci_bus_to_OF_node(bus);
+       struct device_node *dn;
        const char *loc = NULL;
 
-       if (!dn)
-               goto out;
+       while (bus) {
+               dn = pci_bus_to_OF_node(bus);
+               if (!dn) {
+                       bus = bus->parent;
+                       continue;
+               }
 
-       /* PHB PE or root PE ? */
-       if (pci_is_root_bus(bus)) {
-               loc = of_get_property(dn, "ibm,loc-code", NULL);
-               if (!loc)
+               if (pci_is_root_bus(bus))
                        loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+               else
+                       loc = of_get_property(dn, "ibm,slot-location-code",
+                                             NULL);
+
                if (loc)
-                       goto out;
+                       return loc;
 
-               /* Check the root port */
-               dn = dn->child;
-               if (!dn)
-                       goto out;
+               bus = bus->parent;
        }
 
-       loc = of_get_property(dn, "ibm,loc-code", NULL);
-       if (!loc)
-               loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-       return loc ? loc : "N/A";
+       return "N/A";
 }
 
 /**
index db475d4..f28754c 100644 (file)
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
        li      r5,0
        blr     /* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-       .llong  __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-       .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-       .llong 0 /* .value */
-       .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
index 59663af..ac64ffd 100644 (file)
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
                }
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
        unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
        for (i = 1; i < numsyms; i++) {
                if (syms[i].st_shndx == SHN_UNDEF) {
                        char *name = strtab + syms[i].st_name;
-                       if (name[0] == '.')
+                       if (name[0] == '.') {
+                               if (strcmp(name+1, "TOC.") == 0)
+                                       syms[i].st_shndx = SHN_ABS;
                                memmove(name, name+1, strlen(name));
+                       }
                }
        }
 }
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
        numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
        for (i = 1; i < numsyms; i++) {
-               if (syms[i].st_shndx == SHN_UNDEF
+               if (syms[i].st_shndx == SHN_ABS
                    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
                        return &syms[i];
        }
index 774a253..9bf7031 100644 (file)
@@ -377,15 +377,12 @@ no_seg_found:
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s;
        u64 esid, esid_1t;
        int slb_nr;
        struct kvmppc_slb *slbe;
 
        dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-       vcpu_book3s = to_book3s(vcpu);
-
        esid = GET_ESID(rb);
        esid_1t = GET_ESID_1T(rb);
        slb_nr = rb & 0xfff;
index cff207b..baeddb0 100644 (file)
@@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        vcpu->stat.sum_exits++;
 
+       /*
+        * This can happen if an interrupt occurs in the last stages
+        * of guest entry or the first stages of guest exit (i.e. after
+        * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+        * and before setting it to KVM_GUEST_MODE_HOST_HV).
+        * That can happen due to a bug, or due to a machine check
+        * occurring at just the wrong time.
+        */
+       if (vcpu->arch.shregs.msr & MSR_HV) {
+               printk(KERN_EMERG "KVM trap in HV mode!\n");
+               printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+                       vcpu->arch.trap, kvmppc_get_pc(vcpu),
+                       vcpu->arch.shregs.msr);
+               kvmppc_dump_regs(vcpu);
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               return RESUME_HOST;
+       }
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
        switch (vcpu->arch.trap) {
index 3c6badc..6ee26de 100644 (file)
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:     rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
-       rlwimi  r5, r4, 1, DAWRX_WT
+       rlwimi  r5, r4, 2, DAWRX_WT
        clrrdi  r4, r4, 3
        std     r4, VCPU_DAWR(r3)
        std     r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@ machine_check_realmode:
         * guest as machine check causing guest to crash.
         */
        ld      r11, VCPU_MSR(r9)
+       rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+       bne     mc_cont                 /* if so, exit to host */
        andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
        beq     1f                      /* Deliver a machine check to guest */
        ld      r10, VCPU_PC(r9)
index 6fd2405..a3b182d 100644 (file)
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-                               r = -ENXIO;
-                               break;
-                       }
-                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
+                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+                               r = -ENXIO;
+                               break;
+                       }
+                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
index 22d94c3..d0f0a51 100644 (file)
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+       if (page_is_rtas_user_buf(pfn))
+               return 1;
        if (iomem_is_exclusive(PFN_PHYS(pfn)))
                return 0;
        if (!page_is_ram(pfn))
                return 1;
-       if (page_is_rtas_user_buf(pfn))
-               return 1;
        return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
index 7d5e295..9958ba8 100644 (file)
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
        .get_constraint         = power8_get_constraint,
        .get_alternatives       = power8_get_alternatives,
        .disable_pmc            = power8_disable_pmc,
-       .flags                  = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+       .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
        .n_generic              = ARRAY_SIZE(power8_generic_events),
        .generic_events         = power8_generic_events,
        .cache_events           = &power8_cache_events,
index 5038fd5..2936a00 100644 (file)
@@ -1799,9 +1799,9 @@ static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int data
        struct inode *inode = file_inode(file);
        int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (!err) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = spufs_mfc_flush(file, NULL);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return err;
 }
index ad4840f..dfa8638 100644 (file)
@@ -163,7 +163,7 @@ static void spufs_prune_dir(struct dentry *dir)
 {
        struct dentry *dentry, *tmp;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
                spin_lock(&dentry->d_lock);
                if (simple_positive(dentry)) {
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
                }
        }
        shrink_dcache_parent(dir);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
 
 /* Caller must hold parent->i_mutex */
@@ -225,9 +225,9 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
        parent = d_inode(dir->d_parent);
        ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
-       mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(parent, I_MUTEX_PARENT);
        ret = spufs_rmdir(parent, dir);
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        WARN_ON(ret);
 
        return dcache_dir_close(inode, file);
@@ -270,7 +270,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dget(dentry);
        inc_nlink(dir);
@@ -291,7 +291,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        if (ret)
                spufs_rmdir(dir, dentry);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index b2e5902..0f3da2c 100644 (file)
@@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry)
        struct dentry *parent;
 
        parent = dentry->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry)
        }
        d_delete(dentry);
        dput(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 
 static void hypfs_delete_tree(struct dentry *root)
@@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry)) {
                dentry = ERR_PTR(-ENOMEM);
@@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        d_instantiate(dentry, inode);
        dget(dentry);
 fail:
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index 16aa0c7..595a275 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED       (3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)                                   \
 ({                                                                     \
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
        __arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-       __arch_local_irq_ssm(flags);
+       /* only disabled->disabled and disabled->enabled is valid */
+       if (flags & ARCH_IRQ_ENABLED)
+               arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & (3UL << (BITS_PER_LONG - 8)));
+       return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
index 6742414..8959ebb 100644 (file)
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
        struct kvm_s390_sie_block *sie_block;
        unsigned int      host_acrs[NUM_ACRS];
        struct fpu        host_fpregs;
-       struct fpu        guest_fpregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
index 1a9a98d..69aa18b 100644 (file)
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES         0x7fff
-#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK       0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK       0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT               48
+#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK       ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES                                                 \
+       ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK                                               \
+       (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
        u32 fh;
@@ -21,8 +24,9 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)                                                         \
-       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)                                                      \
        ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
index f16debf..1c4fe12 100644 (file)
@@ -166,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
  */
 #define start_thread(regs, new_psw, new_stackp) do {                   \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;    \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        execve_tail();                                                  \
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {                 \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_BA;                  \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        crst_table_downgrade(current->mm, 1UL << 31);                   \
        execve_tail();                                                  \
index f00cd35..99bc456 100644 (file)
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 #define arch_has_block_step()  (1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
                                           unsigned long val)
 {
-       regs->psw.addr = val | PSW_ADDR_AMODE;
+       regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-       return regs->gprs[15] & PSW_ADDR_INSN;
+       return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
index 34ec202..ab3aa68 100644 (file)
 #define __NR_recvmsg           372
 #define __NR_shutdown          373
 #define __NR_mlock2            374
-#define NR_syscalls 375
+#define __NR_copy_file_range   375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
index fac4eed..ae2cda5 100644 (file)
@@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
index a92b39f..3986c9f 100644 (file)
@@ -59,8 +59,6 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
        struct save_area *sa;
 
        sa = (void *) memblock_alloc(sizeof(*sa), 8);
-       if (!sa)
-               return NULL;
        if (is_boot_cpu)
                list_add(&sa->list, &dump_save_areas);
        else
index 6fca0e4..c890a55 100644 (file)
@@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
                except_str = "*";
        else
                except_str = "-";
-       caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+       caller = (unsigned long) entry->caller;
        rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
                      area, (long long)time_spec.tv_sec,
                      time_spec.tv_nsec / 1000, level, except_str,
index dc8e204..02bd02f 100644 (file)
@@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
        unsigned long addr;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               addr = sf->gprs[8] & PSW_ADDR_INSN;
+               addr = sf->gprs[8];
                printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                }
                /* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                low = sp;
                sp = regs->gprs[15];
index 20a5caf..c55576b 100644 (file)
@@ -252,14 +252,14 @@ static void early_pgm_check_handler(void)
        unsigned long addr;
 
        addr = S390_lowcore.program_old_psw.addr;
-       fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(addr);
        if (!fixup)
                disabled_wait(0);
        /* Disable low address protection before storing into lowcore. */
        __ctl_store(cr0, 0, 0);
        cr0_new = cr0 & ~(1UL << 28);
        __ctl_load(cr0_new, 0, 0);
-       S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+       S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
        __ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
        psw_t psw;
 
        psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+       psw.addr = (unsigned long) s390_base_ext_handler;
        S390_lowcore.external_new_psw = psw;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+       psw.addr = (unsigned long) s390_base_pgm_handler;
        S390_lowcore.program_new_psw = psw;
        s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
index e0eaf11..0f7bfeb 100644 (file)
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
                goto out;
        if (unlikely(atomic_read(&current->tracing_graph_pause)))
                goto out;
-       ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+       ip -= MCOUNT_INSN_SIZE;
        trace.func = ip;
        trace.depth = current->curr_ret_stack + 1;
        /* Only trace if the calling function expects to. */
index 0a5a6b6..f20abdb 100644 (file)
@@ -2057,12 +2057,12 @@ void s390_reset_system(void)
        /* Set new machine check handler */
        S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+               (unsigned long) s390_base_mcck_handler;
 
        /* Set new program check handler */
        S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+               (unsigned long) s390_base_pgm_handler;
 
        /*
         * Clear subchannel ID and number to signal new kernel that no CCW or
index 389db56..250f597 100644 (file)
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(per_kprobe, 9, 11);
        regs->psw.mask |= PSW_MASK_PER;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
        regs->psw.mask &= ~PSW_MASK_PER;
        regs->psw.mask |= kcb->kprobe_saved_imask;
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
         */
        preempt_disable();
        kcb = get_kprobe_ctlblk();
-       p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+       p = get_kprobe((void *)(regs->psw.addr - 2));
 
        if (p) {
                if (kprobe_running()) {
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        break;
        }
 
-       regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+       regs->psw.addr = orig_ret_address;
 
        pop_kprobe(get_kprobe_ctlblk());
        kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+       unsigned long ip = regs->psw.addr;
        int fixup = probe_get_fixup_type(p->ainsn.insn);
 
        /* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * In case the user-specified fault handler returned
                 * zero, try to fix up.
                 */
-               entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               entry = search_exception_tables(regs->psw.addr);
                if (entry) {
-                       regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(entry);
                        return 1;
                }
 
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
        /* setup return addr to the jprobe handler routine */
-       regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) jp->entry;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
        /* r15 is the stack pointer */
index 61595c1..cfcba2d 100644 (file)
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-       return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+       return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
        struct pt_regs *regs;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       perf_callchain_store(entry,
-                                            sf->gprs[8] & PSW_ADDR_INSN);
+                       perf_callchain_store(entry, sf->gprs[8]);
                }
                /* Zero backchain detected, check for interrupt frame. */
                sp = (unsigned long) (sf + 1);
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 114ee8b..2bba7df 100644 (file)
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
                return 0;
        low = task_stack_page(tsk);
        high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) tsk->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
-       sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) sf->back_chain;
        if (sf <= low || sf > high)
                return 0;
        return sf->gprs[8];
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                memset(&frame->childregs, 0, sizeof(struct pt_regs));
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
                                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-               frame->childregs.psw.addr = PSW_ADDR_AMODE |
+               frame->childregs.psw.addr =
                                (unsigned long) kernel_thread_starter;
                frame->childregs.gprs[9] = new_stackp; /* function */
                frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
                return 0;
        low = task_stack_page(p);
        high = (struct stack_frame *) task_pt_regs(p);
-       sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) p->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
        for (count = 0; count < 16; count++) {
-               sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+               sf = (struct stack_frame *) sf->back_chain;
                if (sf <= low || sf > high)
                        return 0;
-               return_address = sf->gprs[8] & PSW_ADDR_INSN;
+               return_address = sf->gprs[8];
                if (!in_sched_functions(return_address))
                        return return_address;
        }
index 01c37b3..49b1c13 100644 (file)
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
                if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
                        new.control |= PER_EVENT_IFETCH;
                new.start = 0;
-               new.end = PSW_ADDR_INSN;
+               new.end = -1UL;
        }
 
        /* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
        else if (addr == (addr_t) &dummy->cr11)
                /* End address of the active per set. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
-                       PSW_ADDR_INSN : child->thread.per_user.end;
+                       -1UL : child->thread.per_user.end;
        else if (addr == (addr_t) &dummy->bits)
                /* Single-step bit. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
                }
                return 0;
        default:
-               /* Removing high order bit from addr (only for 31 bit). */
-               addr &= PSW_ADDR_INSN;
                return ptrace_request(child, request, addr, data);
        }
 }
index c6878fb..9220db5 100644 (file)
@@ -301,25 +301,21 @@ static void __init setup_lowcore(void)
        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
        lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
        lc->restart_psw.mask = PSW_KERNEL_BITS;
-       lc->restart_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+       lc->restart_psw.addr = (unsigned long) restart_int_handler;
        lc->external_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->external_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+       lc->external_new_psw.addr = (unsigned long) ext_int_handler;
        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-       lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+       lc->svc_new_psw.addr = (unsigned long) system_call;
        lc->program_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+       lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-       lc->mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+       lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
        lc->io_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+       lc->io_new_psw.addr = (unsigned long) io_int_handler;
        lc->clock_comparator = -1ULL;
        lc->kernel_stack = ((unsigned long) &init_thread_union)
                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
index 028cc46..d82562c 100644 (file)
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ka->sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ka->sa.sa_restorer;
        } else {
                /* Signal frame without vector registers are short ! */
                __u16 __user *svc = (void __user *) frame + frame_size - 2;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
        regs->gprs[2] = sig;
        regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long)
-                       ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ksig->ka.sa.sa_restorer;
        } else {
                __u16 __user *svc = &frame->svc_insn;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
        regs->gprs[2] = ksig->sig;
        regs->gprs[3] = (unsigned long) &frame->info;
index a13468b..3c65a8e 100644 (file)
@@ -623,8 +623,6 @@ void __init smp_save_dump_cpus(void)
                return;
        /* Allocate a page as dumping area for the store status sigps */
        page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-       if (!page)
-               panic("could not allocate memory for save area\n");
        /* Set multi-threading state to the previous system. */
        pcpu_set_smt(sclp.mtid_prev);
        boot_cpu_addr = stap();
index 1785cd8..5acba3c 100644 (file)
@@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace,
        unsigned long addr;
 
        while(1) {
-               sp &= PSW_ADDR_INSN;
                if (sp < low || sp > high)
                        return sp;
                sf = (struct stack_frame *)sp;
                while(1) {
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
                        else
@@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                        if (trace->nr_entries >= trace->max_entries)
                                return sp;
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *)sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                if (savesched || !in_sched_functions(addr)) {
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
@@ -65,7 +64,7 @@ void save_stack_trace(struct stack_trace *trace)
        register unsigned long sp asm ("15");
        unsigned long orig_sp, new_sp;
 
-       orig_sp = sp & PSW_ADDR_INSN;
+       orig_sp = sp;
        new_sp = save_context_stack(trace, orig_sp,
                                    S390_lowcore.panic_stack - PAGE_SIZE,
                                    S390_lowcore.panic_stack, 1);
@@ -86,7 +85,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        unsigned long sp, low, high;
 
-       sp = tsk->thread.ksp & PSW_ADDR_INSN;
+       sp = tsk->thread.ksp;
        low = (unsigned long) task_stack_page(tsk);
        high = (unsigned long) task_pt_regs(tsk);
        save_context_stack(trace, sp, low, high, 0);
index 5378c3e..293d8b9 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
index d69d648..017eb03 100644 (file)
@@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
                address = *(unsigned long *)(current->thread.trap_tdb + 24);
        else
                address = regs->psw.addr;
-       return (void __user *)
-               ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+       return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
                return;
        printk("User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr("in ", regs->psw.addr);
        printk("\n");
        show_regs(regs);
 }
@@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-                       regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(fixup);
                else {
                        enum bug_trap_type btt;
 
-                       btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+                       btt = report_bug(regs->psw.addr, regs);
                        if (btt == BUG_TRAP_TYPE_WARN)
                                return;
                        die(regs, str);
index 5fce52c..5ea5af3 100644 (file)
@@ -29,6 +29,7 @@ config KVM
        select HAVE_KVM_IRQFD
        select HAVE_KVM_IRQ_ROUTING
        select SRCU
+       select KVM_VFIO
        ---help---
          Support hosting paravirtualized guest machines using the SIE
          virtualization capability on the mainframe. This should work
index b3b5534..d42fa38 100644 (file)
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
index 47518a3..d697312 100644 (file)
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
        if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr10 = 0;
-               *cr11 = PSW_ADDR_INSN;
+               *cr11 = -1UL;
        } else {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
                vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
                vcpu->arch.sie_block->gcr[10] = 0;
-               vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+               vcpu->arch.sie_block->gcr[11] = -1UL;
        }
 
        if (guestdbg_hw_bp_enabled(vcpu)) {
index 835d60b..4af21c7 100644 (file)
@@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-       dst->fpc = current->thread.fpu.fpc;
-       dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-       current->thread.fpu.fpc = from->fpc;
-       current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        /* Save host register state */
        save_fpu_regs();
-       save_fpu_to(&vcpu->arch.host_fpregs);
-
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-               /*
-                * Use the register save area in the SIE-control block
-                * for register restore and save in kvm_arch_vcpu_put()
-                */
-               current->thread.fpu.vxrs =
-                       (__vector128 *)&vcpu->run->s.regs.vrs;
-       } else
-               load_fpu_from(&vcpu->arch.guest_fpregs);
+       vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+       vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
+       /* Depending on MACHINE_HAS_VX, data stored to vrs either
+        * has vector register or floating point register format.
+        */
+       current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+       current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
        if (test_fp_ctl(current->thread.fpu.fpc))
                /* User space provided an invalid FPC, let's clear it */
                current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 
+       /* Save guest register state */
        save_fpu_regs();
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-       if (test_kvm_facility(vcpu->kvm, 129))
-               /*
-                * kvm_arch_vcpu_load() set up the register save area to
-                * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-                * are already saved.  Only the floating-point control must be
-                * copied.
-                */
-               vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-       else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
-       load_fpu_from(&vcpu->arch.host_fpregs);
+       /* Restore host register state */
+       current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+       current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-       vcpu->arch.guest_fpregs.fpc = 0;
-       asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+       /* make sure the new fpc will be lazily loaded */
+       save_fpu_regs();
+       current->thread.fpu.fpc = 0;
        vcpu->arch.sie_block->gbea = 1;
        vcpu->arch.sie_block->pp = 0;
        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-       /*
-        * Allocate a save area for floating-point registers.  If the vector
-        * extension is available, register contents are saved in the SIE
-        * control block.  The allocated save area is still required in
-        * particular places, for example, in kvm_s390_vcpu_store_status().
-        */
-       vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-                                              GFP_KERNEL);
-       if (!vcpu->arch.guest_fpregs.fprs)
-               goto out_free_sie_block;
-
        rc = kvm_vcpu_init(vcpu, kvm, id);
        if (rc)
                goto out_free_sie_block;
@@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+       /* make sure the new values will be lazily loaded */
+       save_fpu_regs();
        if (test_fp_ctl(fpu->fpc))
                return -EINVAL;
-       memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-       vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-       save_fpu_regs();
-       load_fpu_from(&vcpu->arch.guest_fpregs);
+       current->thread.fpu.fpc = fpu->fpc;
+       if (MACHINE_HAS_VX)
+               convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+       else
+               memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-       fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+       /* make sure we have the latest values */
+       save_fpu_regs();
+       if (MACHINE_HAS_VX)
+               convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+       else
+               memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+       fpu->fpc = current->thread.fpu.fpc;
        return 0;
 }
 
@@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
        unsigned char archmode = 1;
+       freg_t fprs[NUM_FPRS];
        unsigned int px;
        u64 clkcomp;
        int rc;
@@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
                gpa = px;
        } else
                gpa -= __LC_FPREGS_SAVE_AREA;
-       rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-                            vcpu->arch.guest_fpregs.fprs, 128);
+
+       /* manually convert vector registers if necessary */
+       if (MACHINE_HAS_VX) {
+               convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    fprs, 128);
+       } else {
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    vcpu->run->s.regs.vrs, 128);
+       }
        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
                              vcpu->run->s.regs.gprs, 128);
        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
                              &px, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-                             &vcpu->arch.guest_fpregs.fpc, 4);
+                             &vcpu->run->s.regs.fpc, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
                              &vcpu->arch.sie_block->todpr, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
         * it into the save area
         */
        save_fpu_regs();
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               /*
-                * If the vector extension is available, the vector registers
-                * which overlaps with floating-point registers are saved in
-                * the SIE-control block.  Hence, extract the floating-point
-                * registers and the FPC value and store them in the
-                * guest_fpregs structure.
-                */
-               vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-               convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-                                current->thread.fpu.vxrs);
-       } else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
        save_access_regs(vcpu->run->s.regs.acrs);
 
        return kvm_s390_store_status_unloaded(vcpu, addr);
index 1b903f6..791a414 100644 (file)
@@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
                return;
        printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr(KERN_CONT "in ", regs->psw.addr);
        printk(KERN_CONT "\n");
        printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
               regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@ static noinline void do_no_context(struct pt_regs *regs)
        const struct exception_table_entry *fixup;
 
        /* Are we prepared to handle this kernel fault?  */
-       fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(regs->psw.addr);
        if (fixup) {
-               regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+               regs->psw.addr = extable_fixup(fixup);
                return;
        }
 
index c722400..73e2903 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        __ctl_load(S390_lowcore.kernel_asce, 1, 1);
        __ctl_load(S390_lowcore.kernel_asce, 7, 7);
        __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-       arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+       __arch_local_irq_stosm(0x04);
 
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
index ea01477..45c4daa 100644 (file)
@@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-       if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+       if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
                return 0;
        if (!(flags & MAP_FIXED))
                addr = 0;
        if ((addr + len) >= TASK_SIZE)
-               return crst_table_upgrade(current->mm, 1UL << 53);
+               return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
        return 0;
 }
 
@@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
        area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
        area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area_topdown(filp, addr, len,
index a809fa8..5109827 100644 (file)
@@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
        unsigned long entry;
        int flush;
 
-       BUG_ON(limit > (1UL << 53));
+       BUG_ON(limit > TASK_MAX_SIZE);
        flush = 0;
 repeat:
        table = crst_table_alloc(mm);
index 43f32ce..2794845 100644 (file)
@@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
        pg_data_t *res;
 
-       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-       if (!res)
-               panic("Could not allocate memory for node data!\n");
+       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
        memset(res, 0, sizeof(pg_data_t));
        return res;
 }
@@ -162,7 +160,7 @@ static int __init numa_init_late(void)
                register_one_node(nid);
        return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
index 8a6811b..fe0bfe3 100644 (file)
@@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp,
        struct pt_regs *regs;
 
        while (*depth) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
 
                /* Follow the backchain.  */
                while (*depth) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
                        (*depth)--;
-                       oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+                       oprofile_add_trace(sf->gprs[8]);
 
                }
 
@@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp,
                        return sp;
                regs = (struct pt_regs *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 11d4f27..8f19c8f 100644 (file)
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
        .isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES                                             \
+       min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),  \
+           ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
                              unsigned long max)
 {
        struct zpci_dev *zdev = to_zpci(pdev);
-       u64 addr;
        int idx;
 
-       if ((bar & 7) != bar)
+       if (!pci_resource_len(pdev, bar))
                return NULL;
 
        idx = zdev->bars[bar].map_idx;
        spin_lock(&zpci_iomap_lock);
-       if (zpci_iomap_start[idx].count++) {
-               BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-                      zpci_iomap_start[idx].bar != bar);
-       } else {
-               zpci_iomap_start[idx].fh = zdev->fh;
-               zpci_iomap_start[idx].bar = bar;
-       }
        /* Detect overrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!++zpci_iomap_start[idx].count);
+       zpci_iomap_start[idx].fh = zdev->fh;
+       zpci_iomap_start[idx].bar = bar;
        spin_unlock(&zpci_iomap_lock);
 
-       addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-       return (void __iomem *) addr + offset;
+       return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap);
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-       unsigned int idx;
+       unsigned int idx = ZPCI_IDX(addr);
 
-       idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
        spin_lock(&zpci_iomap_lock);
        /* Detect underrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!zpci_iomap_start[idx].count);
        if (!--zpci_iomap_start[idx].count) {
                zpci_iomap_start[idx].fh = 0;
                zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@ static void zpci_irq_exit(void)
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-       int entry;
+       unsigned long entry;
 
        spin_lock(&zpci_iomap_lock);
-       entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-       if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+       entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+       if (entry == ZPCI_IOMAP_ENTRIES) {
                spin_unlock(&zpci_iomap_lock);
                return -ENOSPC;
        }
-       set_bit(entry, zpci_iomap);
+       set_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
        return entry;
 }
@@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 {
        spin_lock(&zpci_iomap_lock);
        memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-       clear_bit(entry, zpci_iomap);
+       clear_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
                if (zdev->bars[i].val & 4)
                        flags |= IORESOURCE_MEM_64;
 
-               addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+               addr = ZPCI_ADDR(entry);
                size = 1UL << zdev->bars[i].size;
 
                res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@ static int zpci_mem_init(void)
        zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
                                16, 0, NULL);
        if (!zdev_fmb_cache)
-               goto error_zdev;
+               goto error_fmb;
 
-       /* TODO: use realloc */
-       zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-                                  GFP_KERNEL);
+       zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+                                  sizeof(*zpci_iomap_start), GFP_KERNEL);
        if (!zpci_iomap_start)
                goto error_iomap;
-       return 0;
 
+       zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+                                   sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+       if (!zpci_iomap_bitmap)
+               goto error_iomap_bitmap;
+
+       return 0;
+error_iomap_bitmap:
+       kfree(zpci_iomap_start);
 error_iomap:
        kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
        return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+       kfree(zpci_iomap_bitmap);
        kfree(zpci_iomap_start);
        kmem_cache_destroy(zdev_fmb_cache);
 }
index 369a3e0..b0e0475 100644 (file)
@@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
        pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
               pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+       if (!pdev)
+               return;
+
+       pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
index f887c64..8a84e05 100644 (file)
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
index e13d41c..f878bec 100644 (file)
@@ -34,21 +34,18 @@ struct page;
 
 #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
 
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
-
-#define pte_get_bits(pte, bits) ((pte).pte_low & (bits))
-#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits))
-#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \
-                             smp_wmb(); \
-                             (to).pte_low = (from).pte_low; })
-#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high)
-#define pte_set_val(pte, phys, prot) \
-       ({ (pte).pte_high = (phys) >> 32; \
-          (pte).pte_low = (phys) | pgprot_val(prot); })
+#define pte_val(p) ((p).pte)
+
+#define pte_get_bits(p, bits) ((p).pte & (bits))
+#define pte_set_bits(p, bits) ((p).pte |= (bits))
+#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
+#define pte_copy(to, from) ({ (to).pte = (from).pte; })
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_set_val(p, phys, prot) \
+       ({ (p).pte = (phys) | pgprot_val(prot); })
 
 #define pmd_val(x)     ((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
index 330e738..9af2e63 100644 (file)
@@ -509,11 +509,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
        bool "Intel MID platform support"
-       depends on X86_32
        depends on X86_EXTENDED_PLATFORM
        depends on X86_PLATFORM_DEVICES
        depends on PCI
-       depends on PCI_GOANY
+       depends on X86_64 || (PCI_GOANY && X86_32)
        depends on X86_IO_APIC
        select SFI
        select I2C
index 712b130..3a33124 100644 (file)
@@ -157,7 +157,9 @@ ENTRY(chacha20_4block_xor_ssse3)
        # done with the slightly better performing SSSE3 byte shuffling,
        # 7/12-bit word rotation uses traditional shift+OR.
 
-       sub             $0x40,%rsp
+       mov             %rsp,%r11
+       sub             $0x80,%rsp
+       and             $~63,%rsp
 
        # x0..15[0-3] = s0..3[0..3]
        movq            0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@ ENTRY(chacha20_4block_xor_ssse3)
        pxor            %xmm1,%xmm15
        movdqu          %xmm15,0xf0(%rsi)
 
-       add             $0x40,%rsp
+       mov             %r11,%rsp
        ret
 ENDPROC(chacha20_4block_xor_ssse3)
index 881b476..e7de5c9 100644 (file)
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
index 04c27a0..4432ab7 100644 (file)
@@ -366,20 +366,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
        return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                          ((val & _PAGE_PAT_LARGE) >>
                           (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
index 1544fab..c57fd1e 100644 (file)
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
 }
 
 /**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
  * @vaddr:     virtual start address
  * @size:      number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
  * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
  */
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
 {
        u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
        unsigned long clflush_mask = x86_clflush_size - 1;
+       void *vaddr = (void __force *)addr;
        void *vend = vaddr + size;
        void *p;
 
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
        len = copy_from_iter_nocache(vaddr, bytes, i);
 
        if (__iter_needs_pmem_wb(i))
-               __arch_wb_cache_pmem(vaddr, bytes);
+               arch_wb_cache_pmem(addr, bytes);
 
        return len;
 }
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
        void *vaddr = (void __force *)addr;
 
        memset(vaddr, 0, size);
-       __arch_wb_cache_pmem(vaddr, size);
+       arch_wb_cache_pmem(addr, size);
 }
 
 static inline bool __arch_has_wmb_pmem(void)
index f253218..fdb0fbf 100644 (file)
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
        const struct cpumask *mask;
+       struct irq_desc *desc;
        struct irq_data *idata;
        struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
                if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
                        continue;
 
-               idata = irq_get_irq_data(irq);
+               desc = irq_to_desc(irq);
+               raw_spin_lock_irq(&desc->lock);
+               idata = irq_desc_get_irq_data(desc);
 
                /*
                 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
                /* Might be lapic_chip for irq 0 */
                if (chip->irq_set_affinity)
                        chip->irq_set_affinity(idata, mask, false);
+               raw_spin_unlock_irq(&desc->lock);
        }
 }
 #endif
index 908cb37..3b670df 100644 (file)
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
         */
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
-       int cpu, err;
+       int cpu, vector;
 
-       if (d->move_in_progress)
+       /*
+        * If there is still a move in progress or the previous move has not
+        * been cleaned up completely, tell the caller to come back later.
+        */
+       if (d->move_in_progress ||
+           cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
 
        /* Only try and allocate irqs on cpus that are present */
-       err = -ENOSPC;
        cpumask_clear(d->old_domain);
+       cpumask_clear(searched_cpumask);
        cpu = cpumask_first_and(mask, cpu_online_mask);
        while (cpu < nr_cpu_ids) {
-               int new_cpu, vector, offset;
+               int new_cpu, offset;
 
+               /* Get the possible target cpus for @mask/@cpu from the apic */
                apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+               /*
+                * Clear the offline cpus from @vector_cpumask for searching
+                * and verify whether the result overlaps with @mask. If true,
+                * then the call to apic->cpu_mask_to_apicid_and() will
+                * succeed as well. If not, no point in trying to find a
+                * vector in this mask.
+                */
+               cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+               if (!cpumask_intersects(vector_searchmask, mask))
+                       goto next_cpu;
+
                if (cpumask_subset(vector_cpumask, d->domain)) {
-                       err = 0;
                        if (cpumask_equal(vector_cpumask, d->domain))
-                               break;
+                               goto success;
                        /*
-                        * New cpumask using the vector is a proper subset of
-                        * the current in use mask. So cleanup the vector
-                        * allocation for the members that are not used anymore.
+                        * Mark the cpus which are not longer in the mask for
+                        * cleanup.
                         */
-                       cpumask_andnot(d->old_domain, d->domain,
-                                      vector_cpumask);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-                       cpumask_and(d->domain, d->domain, vector_cpumask);
-                       break;
+                       cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+                       vector = d->cfg.vector;
+                       goto update;
                }
 
                vector = current_vector;
@@ -158,45 +170,60 @@ next:
                        vector = FIRST_EXTERNAL_VECTOR + offset;
                }
 
-               if (unlikely(current_vector == vector)) {
-                       cpumask_or(d->old_domain, d->old_domain,
-                                  vector_cpumask);
-                       cpumask_andnot(vector_cpumask, mask, d->old_domain);
-                       cpu = cpumask_first_and(vector_cpumask,
-                                               cpu_online_mask);
-                       continue;
-               }
+               /* If the search wrapped around, try the next cpu */
+               if (unlikely(current_vector == vector))
+                       goto next_cpu;
 
                if (test_bit(vector, used_vectors))
                        goto next;
 
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+               for_each_cpu(new_cpu, vector_searchmask) {
                        if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
                                goto next;
                }
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-               if (d->cfg.vector) {
+               /* Schedule the old vector for cleanup on all cpus */
+               if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-               }
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+               for_each_cpu(new_cpu, vector_searchmask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-               d->cfg.vector = vector;
-               cpumask_copy(d->domain, vector_cpumask);
-               err = 0;
-               break;
-       }
+               goto update;
 
-       if (!err) {
-               /* cache destination APIC IDs into cfg->dest_apicid */
-               err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-                                                  &d->cfg.dest_apicid);
+next_cpu:
+               /*
+                * We exclude the current @vector_cpumask from the requested
+                * @mask and try again with the next online cpu in the
+                * result. We cannot modify @mask, so we use @vector_cpumask
+                * as a temporary buffer here as it will be reassigned when
+                * calling apic->vector_allocation_domain() above.
+                */
+               cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+               cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+               cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+               continue;
        }
+       return -ENOSPC;
 
-       return err;
+update:
+       /*
+        * Exclude offline cpus from the cleanup mask and set the
+        * move_in_progress flag when the result is not empty.
+        */
+       cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+       d->move_in_progress = !cpumask_empty(d->old_domain);
+       d->cfg.vector = vector;
+       cpumask_copy(d->domain, vector_cpumask);
+success:
+       /*
+        * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+        * as we already established, that mask & d->domain & cpu_online_mask
+        * is not empty.
+        */
+       BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+                                           &d->cfg.dest_apicid));
+       return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
        struct irq_desc *desc;
-       unsigned long flags;
        int cpu, vector;
 
-       raw_spin_lock_irqsave(&vector_lock, flags);
        BUG_ON(!data->cfg.vector);
 
        vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
 
-       if (likely(!data->move_in_progress)) {
-               raw_spin_unlock_irqrestore(&vector_lock, flags);
+       /*
+        * If move is in progress or the old_domain mask is not empty,
+        * i.e. the cleanup IPI has not been processed yet, we need to remove
+        * the old references to desc from all cpus vector tables.
+        */
+       if (!data->move_in_progress && cpumask_empty(data->old_domain))
                return;
-       }
 
        desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
                }
        }
        data->move_in_progress = 0;
-       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
                                 unsigned int virq, unsigned int nr_irqs)
 {
+       struct apic_chip_data *apic_data;
        struct irq_data *irq_data;
+       unsigned long flags;
        int i;
 
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
                if (irq_data && irq_data->chip_data) {
+                       raw_spin_lock_irqsave(&vector_lock, flags);
                        clear_irq_vector(virq + i, irq_data->chip_data);
-                       free_apic_chip_data(irq_data->chip_data);
+                       apic_data = irq_data->chip_data;
+                       irq_domain_reset_irq_data(irq_data);
+                       raw_spin_unlock_irqrestore(&vector_lock, flags);
+                       free_apic_chip_data(apic_data);
 #ifdef CONFIG_X86_IO_APIC
                        if (virq + i < nr_legacy_irqs())
                                legacy_irq_data[virq + i] = NULL;
 #endif
-                       irq_domain_reset_irq_data(irq_data);
                }
        }
 }
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
        arch_init_htirq_domain(x86_vector_domain);
 
        BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
        return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
 
        err = assign_irq_vector(irq, data, dest);
-       if (err) {
-               if (assign_irq_vector(irq, data,
-                                     irq_data_get_affinity_mask(irq_data)))
-                       pr_err("Failed to recover vector for irq %d\n", irq);
-               return err;
-       }
-
-       return IRQ_SET_MASK_OK;
+       return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-
-               for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i),
-                                           IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
+       raw_spin_lock(&vector_lock);
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
        data->move_in_progress = 0;
+       if (!cpumask_empty(data->old_domain))
+               apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+       raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
 
                /*
-                * Check if the irq migration is in progress. If so, we
-                * haven't received the cleanup request yet for this irq.
+                * Nothing to cleanup if irq migration is in progress
+                * or this cpu is not set in the cleanup mask.
                 */
-               if (data->move_in_progress)
+               if (data->move_in_progress ||
+                   !cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
 
+               /*
+                * We have two cases to handle here:
+                * 1) vector is unchanged but the target mask got reduced
+                * 2) vector and the target mask has changed
+                *
+                * #1 is obvious, but in #2 we have two vectors with the same
+                * irq descriptor: the old and the new vector. So we need to
+                * make sure that we only cleanup the old vector. The new
+                * vector has the current @vector number in the config and
+                * this cpu is part of the target mask. We better leave that
+                * one alone.
+                */
                if (vector == data->cfg.vector &&
                    cpumask_test_cpu(me, data->domain))
                        goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+               cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+       struct apic_chip_data *data = apic_chip_data(irqdata);
+       struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
-       if (cfg)
-               __irq_complete_move(cfg, cfg->vector);
+       if (!cfg)
+               return;
+
+       __irq_complete_move(cfg, cfg->vector);
+
+       /*
+        * This is tricky. If the cleanup of @data->old_domain has not been
+        * done yet, then the following setaffinity call will fail with
+        * -EBUSY. This can leave the interrupt in a stale state.
+        *
+        * The cleanup cannot make progress because we hold @desc->lock. So in
+        * case @data->old_domain is not yet cleaned up, we need to drop the
+        * lock and acquire it again. @desc cannot go away, because the
+        * hotplug code holds the sparse irq lock.
+        */
+       raw_spin_lock(&vector_lock);
+       /* Clean out all offline cpus (including ourself) first. */
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+       while (!cpumask_empty(data->old_domain)) {
+               raw_spin_unlock(&vector_lock);
+               raw_spin_unlock(&desc->lock);
+               cpu_relax();
+               raw_spin_lock(&desc->lock);
+               /*
+                * Reevaluate apic_chip_data. It might have been cleared after
+                * we dropped @desc->lock.
+                */
+               data = apic_chip_data(irqdata);
+               if (!data)
+                       return;
+               raw_spin_lock(&vector_lock);
+       }
+       raw_spin_unlock(&vector_lock);
 }
 #endif
 
index d760c6b..624db00 100644 (file)
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
                return;
        }
        pr_info("UV: Found %s hub\n", hub);
-       map_low_mmrs();
+
+       /* We now only need to map the MMRs on UV1 */
+       if (is_uv1_hub())
+               map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
index a667078..fed2ab1 100644 (file)
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-       int alt_idx;
+       int alt_idx = idx;
+
        if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
                return idx;
 
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
                return;
 
        if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
                for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_shared_regs *pc;
 
                        pc = per_cpu(cpu_hw_events, i).shared_regs;
                        if (pc && pc->core_id == core_id) {
-                               *onln = cpuc->shared_regs;
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
                                cpuc->shared_regs = pc;
                                break;
                        }
index f97f807..3bf41d4 100644 (file)
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
        case 87: /* Knights Landing */
                ret = knl_uncore_pci_init();
                break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
        default:
                return 0;
        }
index 07aa2d6..a7086b8 100644 (file)
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
index 0b93482..2bd030d 100644 (file)
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
        { /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
        .name           = "snb_uncore",
        .id_table       = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
        .id_table       = bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
        __u32 pci_id;
        struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
        IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
        {  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
        return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
index f129a9a..2c0f340 100644 (file)
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
        reserve_ebda_region();
 
+       switch (boot_params.hdr.hardware_subarch) {
+       case X86_SUBARCH_INTEL_MID:
+               x86_intel_mid_early_setup();
+               break;
+       default:
+               break;
+       }
+
        start_kernel();
 }
index f8062aa..61521dc 100644 (file)
@@ -462,7 +462,7 @@ void fixup_irqs(void)
                 * non intr-remapping case, we can't wait till this interrupt
                 * arrives at this cpu before completing the irq move.
                 */
-               irq_force_complete_move(irq);
+               irq_force_complete_move(desc);
 
                if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
                        break_affinity = 1;
@@ -470,6 +470,15 @@ void fixup_irqs(void)
                }
 
                chip = irq_data_get_irq_chip(data);
+               /*
+                * The interrupt descriptor might have been cleaned up
+                * already, but it is not yet removed from the radix tree
+                */
+               if (!chip) {
+                       raw_spin_unlock(&desc->lock);
+                       continue;
+               }
+
                if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
                        chip->irq_mask(data);
 
index 42982b2..740d7ac 100644 (file)
@@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt)
 }
 __setup("hugepagesz=", setup_hugepagesz);
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 static __init int gigantic_pages_init(void)
 {
-       /* With CMA we can allocate gigantic pages at runtime */
+       /* With compaction or CMA we can allocate gigantic pages at runtime */
        if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        return 0;
index fc6a4c8..2440814 100644 (file)
@@ -33,7 +33,7 @@ struct cpa_data {
        pgd_t           *pgd;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
-       int             numpages;
+       unsigned long   numpages;
        int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * CPA operation. Either a large page has been
                 * preserved or a single page update happened.
                 */
-               BUG_ON(cpa->numpages > numpages);
+               BUG_ON(cpa->numpages > numpages || !cpa->numpages);
                numpages -= cpa->numpages;
                if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
index 1c7380d..2d66db8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
        return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+       { NULL, "SGI UV1",
+               {       DMI_MATCH(DMI_PRODUCT_NAME,     "Stoutland Platform"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION,  "1.0"),
+                       DMI_MATCH(DMI_BIOS_VENDOR,      "SGI.COM"),
+               }
+       },
+       { } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
        /*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
                efi_unmap_memmap();
        }
 
-       /*
-        * UV doesn't support the new EFI pagetable mapping yet.
-        */
-       if (is_uv_system())
+       /* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+       if (dmi_check_system(sgi_uv1_dmi))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
index 1bbc21e..90bb997 100644 (file)
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
                intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
        else {
                intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-               pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+               pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
        }
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
        else if (strcmp("lapic_and_apbt", arg) == 0)
                intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
        else {
-               pr_warn("X86 INTEL_MID timer option %s not recognised"
-                          " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-                          arg);
+               pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+                       arg);
                return -EINVAL;
        }
        return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
index c1bdafa..c61b6c3 100644 (file)
@@ -220,11 +220,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
                if (imr_is_enabled(&imr)) {
                        base = imr_to_phys(imr.addr_lo);
                        end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+                       size = end - base + 1;
                } else {
                        base = 0;
                        end = 0;
+                       size = 0;
                }
-               size = end - base;
                seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
                           "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
                           &base, &end, size, imr.rmask, imr.wmask,
@@ -579,6 +580,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
        phys_addr_t base = virt_to_phys(&_text);
        size_t size = virt_to_phys(&__end_rodata) - base;
+       unsigned long start, end;
        int i;
        int ret;
 
@@ -586,18 +588,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
        for (i = 0; i < idev->max_imr; i++)
                imr_clear(i);
 
+       start = (unsigned long)_text;
+       end = (unsigned long)__end_rodata - 1;
+
        /*
         * Setup a locked IMR around the physical extent of the kernel
         * from the beginning of the .text secton to the end of the
         * .rodata section as one physically contiguous block.
+        *
+        * We don't round up @size since it is already PAGE_SIZE aligned.
+        * See vmlinux.lds.S for details.
         */
        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
        if (ret < 0) {
-               pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-                       &_text, &__end_rodata);
+               pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        } else {
-               pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-                       size / 1024, &_text, &__end_rodata);
+               pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        }
 
 }
index db5f622..9eda232 100644 (file)
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
                        blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-                       blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+                       blk-lib.o blk-mq.o blk-mq-tag.o \
                        blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
                        genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
                        badblocks.o partitions/
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
deleted file mode 100644 (file)
index 0736729..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Functions related to interrupt-poll handling in the block layer. This
- * is similar to NAPI for network devices.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/blk-iopoll.h>
-#include <linux/delay.h>
-
-#include "blk.h"
-
-static unsigned int blk_iopoll_budget __read_mostly = 256;
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
-
-/**
- * blk_iopoll_sched - Schedule a run of the iopoll handler
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Add this blk_iopoll structure to the pending poll list and trigger the
- *     raise of the blk iopoll softirq. The driver must already have gotten a
- *     successful return from blk_iopoll_sched_prep() before calling this.
- **/
-void blk_iopoll_sched(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
-       __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_sched);
-
-/**
- * __blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     See blk_iopoll_complete(). This function must be called with interrupts
- *     disabled.
- **/
-void __blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       list_del(&iop->list);
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(__blk_iopoll_complete);
-
-/**
- * blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     If a driver consumes less than the assigned budget in its run of the
- *     iopoll handler, it'll end the polled mode by calling this function. The
- *     iopoll handler will not be invoked again before blk_iopoll_sched_prep()
- *     is called.
- **/
-void blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __blk_iopoll_complete(iop);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_complete);
-
-static void blk_iopoll_softirq(struct softirq_action *h)
-{
-       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
-       int rearm = 0, budget = blk_iopoll_budget;
-       unsigned long start_time = jiffies;
-
-       local_irq_disable();
-
-       while (!list_empty(list)) {
-               struct blk_iopoll *iop;
-               int work, weight;
-
-               /*
-                * If softirq window is exhausted then punt.
-                */
-               if (budget <= 0 || time_after(jiffies, start_time)) {
-                       rearm = 1;
-                       break;
-               }
-
-               local_irq_enable();
-
-               /* Even though interrupts have been re-enabled, this
-                * access is safe because interrupts can only add new
-                * entries to the tail of this list, and only ->poll()
-                * calls can remove this head entry from the list.
-                */
-               iop = list_entry(list->next, struct blk_iopoll, list);
-
-               weight = iop->weight;
-               work = 0;
-               if (test_bit(IOPOLL_F_SCHED, &iop->state))
-                       work = iop->poll(iop, weight);
-
-               budget -= work;
-
-               local_irq_disable();
-
-               /*
-                * Drivers must not modify the iopoll state, if they
-                * consume their assigned weight (or more, some drivers can't
-                * easily just stop processing, they have to complete an
-                * entire mask of commands).In such cases this code
-                * still "owns" the iopoll instance and therefore can
-                * move the instance around on the list at-will.
-                */
-               if (work >= weight) {
-                       if (blk_iopoll_disable_pending(iop))
-                               __blk_iopoll_complete(iop);
-                       else
-                               list_move_tail(&iop->list, list);
-               }
-       }
-
-       if (rearm)
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-
-       local_irq_enable();
-}
-
-/**
- * blk_iopoll_disable - Disable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Disable io polling and wait for any pending callbacks to have completed.
- **/
-void blk_iopoll_disable(struct blk_iopoll *iop)
-{
-       set_bit(IOPOLL_F_DISABLE, &iop->state);
-       while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
-               msleep(1);
-       clear_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_disable);
-
-/**
- * blk_iopoll_enable - Enable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Enable iopoll on this @iop. Note that the handler run will not be
- *     scheduled, it will only mark it as active.
- **/
-void blk_iopoll_enable(struct blk_iopoll *iop)
-{
-       BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_enable);
-
-/**
- * blk_iopoll_init - Initialize this @iop
- * @iop:      The parent iopoll structure
- * @weight:   The default weight (or command completion budget)
- * @poll_fn:  The handler to invoke
- *
- * Description:
- *     Initialize this blk_iopoll structure. Before being actively used, the
- *     driver must call blk_iopoll_enable().
- **/
-void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
-{
-       memset(iop, 0, sizeof(*iop));
-       INIT_LIST_HEAD(&iop->list);
-       iop->weight = weight;
-       iop->poll = poll_fn;
-       set_bit(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_init);
-
-static int blk_iopoll_cpu_notify(struct notifier_block *self,
-                                unsigned long action, void *hcpu)
-{
-       /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
-        */
-       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-               int cpu = (unsigned long) hcpu;
-
-               local_irq_disable();
-               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
-                                this_cpu_ptr(&blk_cpu_iopoll));
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-               local_irq_enable();
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block blk_iopoll_cpu_notifier = {
-       .notifier_call  = blk_iopoll_cpu_notify,
-};
-
-static __init int blk_iopoll_setup(void)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
-
-       open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
-       register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
-       return 0;
-}
-subsys_initcall(blk_iopoll_setup);
index 1699df5..888a7fe 100644 (file)
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
        return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+                                      struct bio *bio)
+{
+       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+       unsigned mask = queue_logical_block_size(q) - 1;
+
+       /* aligned to logical block size */
+       sectors &= ~(mask >> 9);
+
+       return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
                                         struct bio *bio,
                                         struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        unsigned front_seg_size = bio->bi_seg_front_size;
        bool do_split = true;
        struct bio *new = NULL;
+       const unsigned max_sectors = get_max_io_size(q, bio);
 
        bio_for_each_segment(bv, bio, iter) {
                /*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
                        goto split;
 
-               if (sectors + (bv.bv_len >> 9) >
-                               blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+               if (sectors + (bv.bv_len >> 9) > max_sectors) {
                        /*
                         * Consider this a new segment if we're splitting in
                         * the middle of this vector.
                         */
                        if (nsegs < queue_max_segments(q) &&
-                           sectors < blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector)) {
+                           sectors < max_sectors) {
                                nsegs++;
-                               sectors = blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector);
+                               sectors = max_sectors;
                        }
-                       goto split;
+                       if (sectors)
+                               goto split;
+                       /* Make this single bvec as the 1st segment */
                }
 
                if (bvprvp && blk_queue_cluster(q)) {
index 2c84683..d8996bb 100644 (file)
@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
 
        return true;
 }
-
-static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
-{
-       unsigned long arg;
-       int rc = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       if (get_user(arg, (int __user *)(argp)))
-               return -EFAULT;
-       arg = !!arg;
-       if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
-               return 0;
-
-       if (arg)
-               arg = S_DAX;
-
-       if (arg && !blkdev_dax_capable(bdev))
-               return -ENOTTY;
-
-       mutex_lock(&bdev->bd_inode->i_mutex);
-       if (bdev->bd_map_count == 0)
-               inode_set_flags(bdev->bd_inode, arg, S_DAX);
-       else
-               rc = -EBUSY;
-       mutex_unlock(&bdev->bd_inode->i_mutex);
-       return rc;
-}
-#else
-static int blkdev_daxset(struct block_device *bdev, int arg)
-{
-       if (arg)
-               return -ENOTTY;
-       return 0;
-}
 #endif
 
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        case BLKTRACESETUP:
        case BLKTRACETEARDOWN:
                return blk_trace_ioctl(bdev, cmd, argp);
-       case BLKDAXSET:
-               return blkdev_daxset(bdev, arg);
        case BLKDAXGET:
                return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
                break;
index 746935a..fefd01b 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <linux/dax.h>
 #include <linux/blktrace_api.h>
 
 #include "partitions/check.h"
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
        return 0;
 }
 
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+                       NULL);
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+{
        struct page *page;
 
-       page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-                                NULL);
+       /* don't populate page cache for dax capable devices */
+       if (IS_DAX(bdev->bd_inode))
+               page = read_dax_sector(bdev, n);
+       else
+               page = read_pagecache_sector(bdev, n);
+
        if (!IS_ERR(page)) {
                if (PageError(page))
                        goto fail;
index 7240821..3be07ad 100644 (file)
@@ -472,11 +472,13 @@ config CRYPTO_CRCT10DIF_PCLMUL
 config CRYPTO_GHASH
        tristate "GHASH digest algorithm"
        select CRYPTO_GF128MUL
+       select CRYPTO_HASH
        help
          GHASH is message digest algorithm for GCM (Galois/Counter Mode).
 
 config CRYPTO_POLY1305
        tristate "Poly1305 authenticator algorithm"
+       select CRYPTO_HASH
        help
          Poly1305 authenticator algorithm, RFC7539.
 
index a8e7aa3..f5e18c2 100644 (file)
@@ -76,6 +76,8 @@ int af_alg_register_type(const struct af_alg_type *type)
                goto unlock;
 
        type->ops->owner = THIS_MODULE;
+       if (type->ops_nokey)
+               type->ops_nokey->owner = THIS_MODULE;
        node->type = type;
        list_add(&node->list, &alg_types);
        err = 0;
@@ -125,6 +127,26 @@ int af_alg_release(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(af_alg_release);
 
+void af_alg_release_parent(struct sock *sk)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       unsigned int nokey = ask->nokey_refcnt;
+       bool last = nokey && !ask->refcnt;
+
+       sk = ask->parent;
+       ask = alg_sk(sk);
+
+       lock_sock(sk);
+       ask->nokey_refcnt -= nokey;
+       if (!last)
+               last = !--ask->refcnt;
+       release_sock(sk);
+
+       if (last)
+               sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_release_parent);
+
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        const u32 forbidden = CRYPTO_ALG_INTERNAL;
@@ -133,6 +155,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct sockaddr_alg *sa = (void *)uaddr;
        const struct af_alg_type *type;
        void *private;
+       int err;
 
        if (sock->state == SS_CONNECTED)
                return -EINVAL;
@@ -160,16 +183,22 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                return PTR_ERR(private);
        }
 
+       err = -EBUSY;
        lock_sock(sk);
+       if (ask->refcnt | ask->nokey_refcnt)
+               goto unlock;
 
        swap(ask->type, type);
        swap(ask->private, private);
 
+       err = 0;
+
+unlock:
        release_sock(sk);
 
        alg_do_release(type, private);
 
-       return 0;
+       return err;
 }
 
 static int alg_setkey(struct sock *sk, char __user *ukey,
@@ -202,11 +231,15 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
-       int err = -ENOPROTOOPT;
+       int err = -EBUSY;
 
        lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock;
+
        type = ask->type;
 
+       err = -ENOPROTOOPT;
        if (level != SOL_ALG || !type)
                goto unlock;
 
@@ -238,6 +271,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
        struct sock *sk2;
+       unsigned int nokey;
        int err;
 
        lock_sock(sk);
@@ -257,20 +291,29 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        security_sk_clone(sk, sk2);
 
        err = type->accept(ask->private, sk2);
-       if (err) {
-               sk_free(sk2);
+
+       nokey = err == -ENOKEY;
+       if (nokey && type->accept_nokey)
+               err = type->accept_nokey(ask->private, sk2);
+
+       if (err)
                goto unlock;
-       }
 
        sk2->sk_family = PF_ALG;
 
-       sock_hold(sk);
+       if (nokey || !ask->refcnt++)
+               sock_hold(sk);
+       ask->nokey_refcnt += nokey;
        alg_sk(sk2)->parent = sk;
        alg_sk(sk2)->type = type;
+       alg_sk(sk2)->nokey_refcnt = nokey;
 
        newsock->ops = type->ops;
        newsock->state = SS_CONNECTED;
 
+       if (nokey)
+               newsock->ops = type->ops_nokey;
+
        err = 0;
 
 unlock:
index 9c1dc8d..d19b523 100644 (file)
@@ -451,6 +451,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        struct ahash_alg *alg = crypto_ahash_alg(hash);
 
        hash->setkey = ahash_nosetkey;
+       hash->has_setkey = false;
        hash->export = ahash_no_export;
        hash->import = ahash_no_import;
 
@@ -463,8 +464,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        hash->finup = alg->finup ?: ahash_def_finup;
        hash->digest = alg->digest;
 
-       if (alg->setkey)
+       if (alg->setkey) {
                hash->setkey = alg->setkey;
+               hash->has_setkey = true;
+       }
        if (alg->export)
                hash->export = alg->export;
        if (alg->import)
index b4c24fe..68a5cea 100644 (file)
@@ -34,6 +34,11 @@ struct hash_ctx {
        struct ahash_request req;
 };
 
+struct algif_hash_tfm {
+       struct crypto_ahash *hash;
+       bool has_key;
+};
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
@@ -49,7 +54,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        if (!ctx->more) {
-               err = crypto_ahash_init(&ctx->req);
+               err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
+                                               &ctx->completion);
                if (err)
                        goto unlock;
        }
@@ -120,6 +126,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        } else {
                if (!ctx->more) {
                        err = crypto_ahash_init(&ctx->req);
+                       err = af_alg_wait_for_completion(err, &ctx->completion);
                        if (err)
                                goto unlock;
                }
@@ -235,19 +242,151 @@ static struct proto_ops algif_hash_ops = {
        .accept         =       hash_accept,
 };
 
+static int hash_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct algif_hash_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t size)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendmsg(sock, msg, size);
+}
+
+static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
+                                  int offset, size_t size, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendpage(sock, page, offset, size, flags);
+}
+
+static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t ignored, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_recvmsg(sock, msg, ignored, flags);
+}
+
+static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
+                            int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_accept(sock, newsock, flags);
+}
+
+static struct proto_ops algif_hash_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .setsockopt     =       sock_no_setsockopt,
+       .poll           =       sock_no_poll,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       hash_sendmsg_nokey,
+       .sendpage       =       hash_sendpage_nokey,
+       .recvmsg        =       hash_recvmsg_nokey,
+       .accept         =       hash_accept_nokey,
+};
+
 static void *hash_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_ahash(name, type, mask);
+       struct algif_hash_tfm *tfm;
+       struct crypto_ahash *hash;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       hash = crypto_alloc_ahash(name, type, mask);
+       if (IS_ERR(hash)) {
+               kfree(tfm);
+               return ERR_CAST(hash);
+       }
+
+       tfm->hash = hash;
+
+       return tfm;
 }
 
 static void hash_release(void *private)
 {
-       crypto_free_ahash(private);
+       struct algif_hash_tfm *tfm = private;
+
+       crypto_free_ahash(tfm->hash);
+       kfree(tfm);
 }
 
 static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_ahash_setkey(private, key, keylen);
+       struct algif_hash_tfm *tfm = private;
+       int err;
+
+       err = crypto_ahash_setkey(tfm->hash, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void hash_sock_destruct(struct sock *sk)
@@ -261,12 +400,14 @@ static void hash_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int hash_accept_parent(void *private, struct sock *sk)
+static int hash_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct hash_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
-       unsigned ds = crypto_ahash_digestsize(private);
+       struct algif_hash_tfm *tfm = private;
+       struct crypto_ahash *hash = tfm->hash;
+       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+       unsigned ds = crypto_ahash_digestsize(hash);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
@@ -286,7 +427,7 @@ static int hash_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       ahash_request_set_tfm(&ctx->req, private);
+       ahash_request_set_tfm(&ctx->req, hash);
        ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                   af_alg_complete, &ctx->completion);
 
@@ -295,12 +436,24 @@ static int hash_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int hash_accept_parent(void *private, struct sock *sk)
+{
+       struct algif_hash_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+               return -ENOKEY;
+
+       return hash_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_hash = {
        .bind           =       hash_bind,
        .release        =       hash_release,
        .setkey         =       hash_setkey,
        .accept         =       hash_accept_parent,
+       .accept_nokey   =       hash_accept_parent_nokey,
        .ops            =       &algif_hash_ops,
+       .ops_nokey      =       &algif_hash_ops_nokey,
        .name           =       "hash",
        .owner          =       THIS_MODULE
 };
index eaa9f9b..28556fc 100644 (file)
@@ -31,6 +31,11 @@ struct skcipher_sg_list {
        struct scatterlist sg[0];
 };
 
+struct skcipher_tfm {
+       struct crypto_skcipher *skcipher;
+       bool has_key;
+};
+
 struct skcipher_ctx {
        struct list_head tsgl;
        struct af_alg_sgl rsgl;
@@ -60,18 +65,10 @@ struct skcipher_async_req {
        struct skcipher_async_rsgl first_sgl;
        struct list_head list;
        struct scatterlist *tsg;
-       char iv[];
+       atomic_t *inflight;
+       struct skcipher_request req;
 };
 
-#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
-
-#define GET_REQ_SIZE(ctx) \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
-
-#define GET_IV_SIZE(ctx) \
-       crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
-
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
                      sizeof(struct scatterlist) - 1)
 
@@ -97,15 +94,12 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
 
 static void skcipher_async_cb(struct crypto_async_request *req, int err)
 {
-       struct sock *sk = req->data;
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
+       struct skcipher_async_req *sreq = req->data;
        struct kiocb *iocb = sreq->iocb;
 
-       atomic_dec(&ctx->inflight);
+       atomic_dec(sreq->inflight);
        skcipher_free_async_sgls(sreq);
-       kfree(req);
+       kzfree(sreq);
        iocb->ki_complete(iocb, err, err);
 }
 
@@ -301,8 +295,11 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        unsigned ivsize = crypto_skcipher_ivsize(tfm);
        struct skcipher_sg_list *sgl;
        struct af_alg_control con = {};
@@ -387,7 +384,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 
                sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
                sg = sgl->sg;
-               sg_unmark_end(sg + sgl->cur);
+               if (sgl->cur)
+                       sg_unmark_end(sg + sgl->cur - 1);
                do {
                        i = sgl->cur;
                        plen = min_t(size_t, len, PAGE_SIZE);
@@ -503,37 +501,43 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        struct skcipher_async_req *sreq;
        struct skcipher_request *req;
        struct skcipher_async_rsgl *last_rsgl = NULL;
-       unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
-       unsigned int reqlen = sizeof(struct skcipher_async_req) +
-                               GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
+       unsigned int txbufs = 0, len = 0, tx_nents;
+       unsigned int reqsize = crypto_skcipher_reqsize(tfm);
+       unsigned int ivsize = crypto_skcipher_ivsize(tfm);
        int err = -ENOMEM;
        bool mark = false;
+       char *iv;
 
-       lock_sock(sk);
-       req = kmalloc(reqlen, GFP_KERNEL);
-       if (unlikely(!req))
-               goto unlock;
+       sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
+       if (unlikely(!sreq))
+               goto out;
 
-       sreq = GET_SREQ(req, ctx);
+       req = &sreq->req;
+       iv = (char *)(req + 1) + reqsize;
        sreq->iocb = msg->msg_iocb;
-       memset(&sreq->first_sgl, '\0', sizeof(struct skcipher_async_rsgl));
        INIT_LIST_HEAD(&sreq->list);
+       sreq->inflight = &ctx->inflight;
+
+       lock_sock(sk);
+       tx_nents = skcipher_all_sg_nents(ctx);
        sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-       if (unlikely(!sreq->tsg)) {
-               kfree(req);
+       if (unlikely(!sreq->tsg))
                goto unlock;
-       }
        sg_init_table(sreq->tsg, tx_nents);
-       memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
-       skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                     skcipher_async_cb, sk);
+       memcpy(iv, ctx->iv, ivsize);
+       skcipher_request_set_tfm(req, tfm);
+       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     skcipher_async_cb, sreq);
 
        while (iov_iter_count(&msg->msg_iter)) {
                struct skcipher_async_rsgl *rsgl;
@@ -609,20 +613,22 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
                sg_mark_end(sreq->tsg + txbufs - 1);
 
        skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-                                  len, sreq->iv);
+                                  len, iv);
        err = ctx->enc ? crypto_skcipher_encrypt(req) :
                         crypto_skcipher_decrypt(req);
        if (err == -EINPROGRESS) {
                atomic_inc(&ctx->inflight);
                err = -EIOCBQUEUED;
+               sreq = NULL;
                goto unlock;
        }
 free:
        skcipher_free_async_sgls(sreq);
-       kfree(req);
 unlock:
        skcipher_wmem_wakeup(sk);
        release_sock(sk);
+       kzfree(sreq);
+out:
        return err;
 }
 
@@ -631,9 +637,12 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
-               &ctx->req));
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
+       unsigned bs = crypto_skcipher_blocksize(tfm);
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        int err = -EAGAIN;
@@ -642,13 +651,6 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        while (msg_data_left(msg)) {
-               sgl = list_first_entry(&ctx->tsgl,
-                                      struct skcipher_sg_list, list);
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
                if (!ctx->used) {
                        err = skcipher_wait_for_data(sk, flags);
                        if (err)
@@ -669,6 +671,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
                if (!used)
                        goto free;
 
+               sgl = list_first_entry(&ctx->tsgl,
+                                      struct skcipher_sg_list, list);
+               sg = sgl->sg;
+
+               while (!sg->length)
+                       sg++;
+
                skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
                                           ctx->iv);
 
@@ -748,19 +757,139 @@ static struct proto_ops algif_skcipher_ops = {
        .poll           =       skcipher_poll,
 };
 
+static int skcipher_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct skcipher_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t size)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendmsg(sock, msg, size);
+}
+
+static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
+                                      int offset, size_t size, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendpage(sock, page, offset, size, flags);
+}
+
+static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t ignored, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_recvmsg(sock, msg, ignored, flags);
+}
+
+static struct proto_ops algif_skcipher_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .accept         =       sock_no_accept,
+       .setsockopt     =       sock_no_setsockopt,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       skcipher_sendmsg_nokey,
+       .sendpage       =       skcipher_sendpage_nokey,
+       .recvmsg        =       skcipher_recvmsg_nokey,
+       .poll           =       skcipher_poll,
+};
+
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_skcipher(name, type, mask);
+       struct skcipher_tfm *tfm;
+       struct crypto_skcipher *skcipher;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       skcipher = crypto_alloc_skcipher(name, type, mask);
+       if (IS_ERR(skcipher)) {
+               kfree(tfm);
+               return ERR_CAST(skcipher);
+       }
+
+       tfm->skcipher = skcipher;
+
+       return tfm;
 }
 
 static void skcipher_release(void *private)
 {
-       crypto_free_skcipher(private);
+       struct skcipher_tfm *tfm = private;
+
+       crypto_free_skcipher(tfm->skcipher);
+       kfree(tfm);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_skcipher_setkey(private, key, keylen);
+       struct skcipher_tfm *tfm = private;
+       int err;
+
+       err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void skcipher_wait(struct sock *sk)
@@ -788,24 +917,26 @@ static void skcipher_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int skcipher_accept_parent(void *private, struct sock *sk)
+static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct skcipher_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
+       struct skcipher_tfm *tfm = private;
+       struct crypto_skcipher *skcipher = tfm->skcipher;
+       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
 
-       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
+       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
                               GFP_KERNEL);
        if (!ctx->iv) {
                sock_kfree_s(sk, ctx, len);
                return -ENOMEM;
        }
 
-       memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
+       memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
        INIT_LIST_HEAD(&ctx->tsgl);
        ctx->len = len;
@@ -818,8 +949,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       skcipher_request_set_tfm(&ctx->req, private);
-       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+       skcipher_request_set_tfm(&ctx->req, skcipher);
+       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+                                                CRYPTO_TFM_REQ_MAY_BACKLOG,
                                      af_alg_complete, &ctx->completion);
 
        sk->sk_destruct = skcipher_sock_destruct;
@@ -827,12 +959,24 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int skcipher_accept_parent(void *private, struct sock *sk)
+{
+       struct skcipher_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+               return -ENOKEY;
+
+       return skcipher_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_skcipher = {
        .bind           =       skcipher_bind,
        .release        =       skcipher_release,
        .setkey         =       skcipher_setkey,
        .accept         =       skcipher_accept_parent,
+       .accept_nokey   =       skcipher_accept_parent_nokey,
        .ops            =       &algif_skcipher_ops,
+       .ops_nokey      =       &algif_skcipher_ops_nokey,
        .name           =       "skcipher",
        .owner          =       THIS_MODULE
 };
index 758acab..8f3056c 100644 (file)
@@ -547,9 +547,7 @@ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
        struct pkcs7_signed_info *sinfo = ctx->sinfo;
 
        if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) ||
-           !test_bit(sinfo_has_message_digest, &sinfo->aa_set) ||
-           (ctx->msg->data_type == OID_msIndirectData &&
-            !test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))) {
+           !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) {
                pr_warn("Missing required AuthAttr\n");
                return -EBADMSG;
        }
index 06f1b60..4c0a0e2 100644 (file)
@@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("crc32c");
 MODULE_ALIAS_CRYPTO("crc32c-generic");
-MODULE_SOFTDEP("pre: crc32c");
index 237f379..43fe85f 100644 (file)
@@ -499,6 +499,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                if (link->dump == NULL)
                        return -EINVAL;
 
+               down_read(&crypto_alg_sem);
                list_for_each_entry(alg, &crypto_alg_list, cra_list)
                        dump_alloc += CRYPTO_REPORT_MAXSIZE;
 
@@ -508,8 +509,11 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                .done = link->done,
                                .min_dump_alloc = dump_alloc,
                        };
-                       return netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+                       err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
                }
+               up_read(&crypto_alg_sem);
+
+               return err;
        }
 
        err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
index ecb1e3d..3597545 100644 (file)
@@ -354,9 +354,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
        crt->final = shash_async_final;
        crt->finup = shash_async_finup;
        crt->digest = shash_async_digest;
+       crt->setkey = shash_async_setkey;
+
+       crt->has_setkey = alg->setkey != shash_no_setkey;
 
-       if (alg->setkey)
-               crt->setkey = shash_async_setkey;
        if (alg->export)
                crt->export = shash_async_export;
        if (alg->import)
index 7591928..d199c0b 100644 (file)
@@ -118,6 +118,7 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
        skcipher->decrypt = skcipher_decrypt_blkcipher;
 
        skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
+       skcipher->has_setkey = calg->cra_blkcipher.max_keysize;
 
        return 0;
 }
@@ -210,6 +211,7 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
        skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
        skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
                            sizeof(struct ablkcipher_request);
+       skcipher->has_setkey = calg->cra_ablkcipher.max_keysize;
 
        return 0;
 }
index c570b1d..0872d5f 100644 (file)
@@ -880,7 +880,7 @@ static int acpi_lpss_platform_notify(struct notifier_block *nb,
                break;
        case BUS_NOTIFY_DRIVER_NOT_BOUND:
        case BUS_NOTIFY_UNBOUND_DRIVER:
-               pdev->dev.pm_domain = NULL;
+               dev_pm_domain_set(&pdev->dev, NULL);
                break;
        case BUS_NOTIFY_ADD_DEVICE:
                dev_pm_domain_set(&pdev->dev, &acpi_lpss_pm_domain);
index 6682c5d..6e6bc10 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/hardirq.h>
 #include <linux/pstore.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
                        return -ENOMEM;
                memcpy(new_entries, entries,
                       erst_record_id_cache.len * sizeof(entries[0]));
-               if (erst_record_id_cache.size < PAGE_SIZE)
-                       kfree(entries);
-               else
-                       vfree(entries);
+               kvfree(entries);
                erst_record_id_cache.entries = entries = new_entries;
                erst_record_id_cache.size = new_size;
        }
index 90e2d54..1316ddd 100644 (file)
@@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
                },
        },
-       {
-       .callback = video_detect_force_vendor,
-       .ident = "Dell Inspiron 5737",
-       .matches = {
-               DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-               DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-               },
-       },
 
        /*
         * These models have a working acpi_video backlight control, and using
index 4a5c9d2..294ba6f 100644 (file)
@@ -4,7 +4,7 @@ config ARM_AMBA
 if ARM_AMBA
 
 config TEGRA_AHB
-       bool "Enable AHB driver for NVIDIA Tegra SoCs"
+       bool
        default y if ARCH_TEGRA
        help
          Adds AHB configuration functionality for NVIDIA Tegra SoCs,
index 68f0314..44a74cf 100644 (file)
@@ -215,9 +215,9 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                newattrs.ia_uid = uid;
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
-               mutex_lock(&d_inode(dentry)->i_mutex);
+               inode_lock(d_inode(dentry));
                notify_change(dentry, &newattrs, NULL);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
 
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
@@ -244,7 +244,7 @@ static int dev_rmdir(const char *name)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
        path_put(&parent);
        return err;
 }
@@ -321,9 +321,9 @@ static int handle_remove(const char *nodename, struct device *dev)
                        newattrs.ia_mode = stat.mode & ~0777;
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        notify_change(dentry, &newattrs, NULL);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
 
        path_put(&parent);
        if (deleted && strchr(nodename, '/'))
index 47c4338..279e539 100644 (file)
@@ -284,6 +284,7 @@ out_free_priv_data:
 
        return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
        msi_domain_free_irqs(dev->msi_domain, dev);
        platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with
index 73d6e5d..f437afa 100644 (file)
@@ -558,10 +558,15 @@ static int platform_drv_probe(struct device *_dev)
                return ret;
 
        ret = dev_pm_domain_attach(_dev, true);
-       if (ret != -EPROBE_DEFER && drv->probe) {
-               ret = drv->probe(dev);
-               if (ret)
-                       dev_pm_domain_detach(_dev, true);
+       if (ret != -EPROBE_DEFER) {
+               if (drv->probe) {
+                       ret = drv->probe(dev);
+                       if (ret)
+                               dev_pm_domain_detach(_dev, true);
+               } else {
+                       /* don't fail if just dev_pm_domain_attach failed */
+                       ret = 0;
+               }
        }
 
        if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
index 93ed14c..f6a9ad5 100644 (file)
@@ -146,7 +146,7 @@ void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd)
        if (dev->pm_domain == pd)
                return;
 
-       WARN(device_is_bound(dev),
+       WARN(pd && device_is_bound(dev),
             "PM domains can only be changed for unbound devices\n");
        dev->pm_domain = pd;
        device_pm_check_callbacks(dev);
index 6ac9a7f..301b785 100644 (file)
@@ -162,7 +162,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
        queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
- * __genpd_poweron - Restore power to a given PM domain and its masters.
+ * genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
        struct gpd_link *link;
        int ret = 0;
@@ -196,11 +195,16 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
         * with it.
         */
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
-               genpd_sd_counter_inc(link->master);
+               struct generic_pm_domain *master = link->master;
+
+               genpd_sd_counter_inc(master);
+
+               mutex_lock_nested(&master->lock, depth + 1);
+               ret = genpd_poweron(master, depth + 1);
+               mutex_unlock(&master->lock);
 
-               ret = genpd_poweron(link->master);
                if (ret) {
-                       genpd_sd_counter_dec(link->master);
+                       genpd_sd_counter_dec(master);
                        goto err;
                }
        }
@@ -223,20 +227,6 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
        return ret;
 }
 
-/**
- * genpd_poweron - Restore power to a given PM domain and its masters.
- * @genpd: PM domain to power up.
- */
-static int genpd_poweron(struct generic_pm_domain *genpd)
-{
-       int ret;
-
-       mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
-       mutex_unlock(&genpd->lock);
-       return ret;
-}
-
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
        return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +474,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
        }
 
        mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
+       ret = genpd_poweron(genpd, 0);
        mutex_unlock(&genpd->lock);
 
        if (ret)
@@ -1339,8 +1329,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
        if (!link)
                return -ENOMEM;
 
-       mutex_lock(&genpd->lock);
-       mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (genpd->status == GPD_STATE_POWER_OFF
            &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1353,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
                genpd_sd_counter_inc(genpd);
 
  out:
-       mutex_unlock(&subdomain->lock);
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
        if (ret)
                kfree(link);
        return ret;
@@ -1385,7 +1375,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
        if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
                return -EINVAL;
 
-       mutex_lock(&genpd->lock);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
                pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1389,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
                if (link->slave != subdomain)
                        continue;
 
-               mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
                list_del(&link->master_node);
                list_del(&link->slave_node);
                kfree(link);
                if (subdomain->status != GPD_STATE_POWER_OFF)
                        genpd_sd_counter_dec(genpd);
 
-               mutex_unlock(&subdomain->lock);
-
                ret = 0;
                break;
        }
 
 out:
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
 
        return ret;
 }
@@ -1818,8 +1806,10 @@ int genpd_dev_pm_attach(struct device *dev)
 
        dev->pm_domain->detach = genpd_dev_pm_detach;
        dev->pm_domain->sync = genpd_dev_pm_sync;
-       ret = genpd_poweron(pd);
 
+       mutex_lock(&pd->lock);
+       ret = genpd_poweron(pd, 0);
+       mutex_unlock(&pd->lock);
 out:
        return ret ? -EPROBE_DEFER : 0;
 }
index 8812bfb..eea5156 100644 (file)
@@ -133,17 +133,17 @@ static int regmap_mmio_gather_write(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       __raw_writeb(*(u8 *)val, ctx->regs + offset);
+                       writeb(*(u8 *)val, ctx->regs + offset);
                        break;
                case 2:
-                       __raw_writew(*(u16 *)val, ctx->regs + offset);
+                       writew(*(u16 *)val, ctx->regs + offset);
                        break;
                case 4:
-                       __raw_writel(*(u32 *)val, ctx->regs + offset);
+                       writel(*(u32 *)val, ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       __raw_writeq(*(u64 *)val, ctx->regs + offset);
+                       writeq(*(u64 *)val, ctx->regs + offset);
                        break;
 #endif
                default:
@@ -193,17 +193,17 @@ static int regmap_mmio_read(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       *(u8 *)val = __raw_readb(ctx->regs + offset);
+                       *(u8 *)val = readb(ctx->regs + offset);
                        break;
                case 2:
-                       *(u16 *)val = __raw_readw(ctx->regs + offset);
+                       *(u16 *)val = readw(ctx->regs + offset);
                        break;
                case 4:
-                       *(u32 *)val = __raw_readl(ctx->regs + offset);
+                       *(u32 *)val = readl(ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       *(u64 *)val = __raw_readq(ctx->regs + offset);
+                       *(u64 *)val = readq(ctx->regs + offset);
                        break;
 #endif
                default:
index ad80c85..d048d20 100644 (file)
@@ -964,9 +964,9 @@ aoecmd_sleepwork(struct work_struct *work)
                ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
                if (bd) {
-                       mutex_lock(&bd->bd_inode->i_mutex);
+                       inode_lock(bd->bd_inode);
                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
-                       mutex_unlock(&bd->bd_inode->i_mutex);
+                       inode_unlock(bd->bd_inode);
                        bdput(bd);
                }
                spin_lock_irq(&d->lock);
index 0dabc9b..92d6fc0 100644 (file)
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
        }
 }
 
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
 {
-       if (v)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 {
        struct page **old_pages = b->bm_pages;
        struct page **new_pages, *page;
-       unsigned int i, bytes, vmalloced = 0;
+       unsigned int i, bytes;
        unsigned long have = b->bm_number_of_pages;
 
        BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                                PAGE_KERNEL);
                if (!new_pages)
                        return NULL;
-               vmalloced = 1;
        }
 
        if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                        page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
                        if (!page) {
                                bm_free_pages(new_pages + have, i - have);
-                               bm_vk_free(new_pages, vmalloced);
+                               bm_vk_free(new_pages);
                                return NULL;
                        }
                        /* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                */
        }
 
-       if (vmalloced)
-               b->bm_flags |= BM_P_VMALLOCED;
-       else
-               b->bm_flags &= ~BM_P_VMALLOCED;
-
        return new_pages;
 }
 
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
        if (!expect(device->bitmap))
                return;
        bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
-       bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+       bm_vk_free(device->bitmap->bm_pages);
        kfree(device->bitmap);
        device->bitmap = NULL;
 }
@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        unsigned long want, have, onpages; /* number of pages */
        struct page **npages, **opages = NULL;
        int err = 0, growing;
-       int opages_vmalloced;
 
        if (!expect(b))
                return -ENOMEM;
@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        if (capacity == b->bm_dev_capacity)
                goto out;
 
-       opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
        if (capacity == 0) {
                spin_lock_irq(&b->bm_lock);
                opages = b->bm_pages;
@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
                b->bm_dev_capacity = 0;
                spin_unlock_irq(&b->bm_lock);
                bm_free_pages(opages, onpages);
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
                goto out;
        }
        bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
 
        spin_unlock_irq(&b->bm_lock);
        if (opages != npages)
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
        if (!growing)
                b->bm_set = bm_count_bits(b);
        drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
index 96a0107..4de95bb 100644 (file)
@@ -434,12 +434,12 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
        if (!parent || d_really_is_negative(parent))
                goto out;
        /* serialize with d_delete() */
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        /* Make sure the object is still alive */
        if (simple_positive(file->f_path.dentry)
        && kref_get_unless_zero(kref))
                ret = 0;
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret) {
                ret = single_open(file, show, data);
                if (ret)
index b6844fe..34bc84e 100644 (file)
@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
 /* definition of bits in bm_flags to be used in drbd_bm_lock
  * and drbd_bitmap_io and friends. */
 enum bm_flag {
-       /* do we need to kfree, or vfree bm_pages? */
-       BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
        /* currently locked for bulk operation */
        BM_LOCKED_MASK = 0xf,
 
index 81ea69f..4a87678 100644 (file)
@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
 
 out_err:
        rbd_dev_unparent(rbd_dev);
-       if (parent)
-               rbd_dev_destroy(parent);
+       rbd_dev_destroy(parent);
        return ret;
 }
 
index 129d47b..9a92c07 100644 (file)
@@ -132,7 +132,7 @@ config SUNXI_RSB
          and AC100/AC200 ICs.
 
 config UNIPHIER_SYSTEM_BUS
-       bool "UniPhier System Bus driver"
+       tristate "UniPhier System Bus driver"
        depends on ARCH_UNIPHIER && OF
        default y
        help
index 6575c0f..c3cb76b 100644 (file)
@@ -192,8 +192,10 @@ static int __init vexpress_config_init(void)
        /* Need the config devices early, before the "normal" devices... */
        for_each_compatible_node(node, NULL, "arm,vexpress,config-bus") {
                err = vexpress_config_populate(node);
-               if (err)
+               if (err) {
+                       of_node_put(node);
                        break;
+               }
        }
 
        return err;
index dbf2271..ff00331 100644 (file)
@@ -372,6 +372,7 @@ config HW_RANDOM_XGENE
 config HW_RANDOM_STM32
        tristate "STMicroelectronics STM32 random number generator"
        depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          This driver provides kernel-side support for the Random Number
          Generator hardware found on STM32 microcontrollers.
index 9fda22e..7fddd86 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 
 #ifdef CONFIG_PARISC
 #include <asm/hardware.h>      /* for register_parisc_driver() stuff */
@@ -2054,8 +2055,6 @@ static int hardcode_find_bmc(void)
 
 #ifdef CONFIG_ACPI
 
-#include <linux/acpi.h>
-
 /*
  * Once we get an ACPI failure, we don't try any more, because we go
  * through the tables sequentially.  Once we don't find a table, there
index 6b1721f..4f6f94c 100644 (file)
@@ -689,7 +689,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 {
        loff_t ret;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        switch (orig) {
        case SEEK_CUR:
                offset += file->f_pos;
@@ -706,7 +706,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
        default:
                ret = -EINVAL;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
        return ret;
 }
 
index f1d7fa4..f3f92d5 100644 (file)
@@ -93,14 +93,11 @@ struct vma_data {
        spinlock_t lock;        /* Serialize access to this structure. */
        int count;              /* Number of pages allocated. */
        enum mspec_page_type type; /* Type of pages allocated. */
-       int flags;              /* See VMD_xxx below. */
        unsigned long vm_start; /* Original (unsplit) base. */
        unsigned long vm_end;   /* Original (unsplit) end. */
        unsigned long maddr[0]; /* Array of MSPEC addresses. */
 };
 
-#define VMD_VMALLOCED 0x1      /* vmalloc'd rather than kmalloc'd */
-
 /* used on shub2 to clear FOP cache in the HUB */
 static unsigned long scratch_page[MAX_NUMNODES];
 #define SH2_AMO_CACHE_ENTRIES  4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
                               "failed to zero page %ld\n", my_page);
        }
 
-       if (vdata->flags & VMD_VMALLOCED)
-               vfree(vdata);
-       else
-               kfree(vdata);
+       kvfree(vdata);
 }
 
 /*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
                                        enum mspec_page_type type)
 {
        struct vma_data *vdata;
-       int pages, vdata_size, flags = 0;
+       int pages, vdata_size;
 
        if (vma->vm_pgoff != 0)
                return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
        vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
        if (vdata_size <= PAGE_SIZE)
                vdata = kzalloc(vdata_size, GFP_KERNEL);
-       else {
+       else
                vdata = vzalloc(vdata_size);
-               flags = VMD_VMALLOCED;
-       }
        if (!vdata)
                return -ENOMEM;
 
        vdata->vm_start = vma->vm_start;
        vdata->vm_end = vma->vm_end;
-       vdata->flags = flags;
        vdata->type = type;
        spin_lock_init(&vdata->lock);
        atomic_set(&vdata->refcnt, 1);
index 0b311fa..b526dc1 100644 (file)
@@ -290,9 +290,9 @@ static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datas
 {
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ps3flash_writeback(ps3flash_dev);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 56777f0..33db740 100644 (file)
@@ -30,6 +30,8 @@ config CLKSRC_MMIO
 config DIGICOLOR_TIMER
        bool "Digicolor timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       select CLKSRC_MMIO
+       depends on HAS_IOMEM
        help
          Enables the support for the digicolor timer driver.
 
@@ -55,6 +57,7 @@ config ARMADA_370_XP_TIMER
        bool "Armada 370 and XP timer driver" if COMPILE_TEST
        depends on ARM
        select CLKSRC_OF
+       select CLKSRC_MMIO
        help
          Enables the support for the Armada 370 and XP timer driver.
 
@@ -76,6 +79,7 @@ config ORION_TIMER
 config SUN4I_TIMER
        bool "Sun4i timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Enables support for the Sun4i timer.
@@ -89,6 +93,7 @@ config SUN5I_HSTIMER
 
 config TEGRA_TIMER
        bool "Tegra timer driver" if COMPILE_TEST
+       select CLKSRC_MMIO
        depends on ARM
        help
          Enables support for the Tegra driver.
@@ -96,6 +101,7 @@ config TEGRA_TIMER
 config VT8500_TIMER
        bool "VT8500 timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Enables support for the VT8500 driver.
 
@@ -131,6 +137,7 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 config CLKSRC_DBX500_PRCMU
        bool "Clocksource PRCMU Timer" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Use the always on PRCMU Timer as clocksource
 
@@ -248,6 +255,7 @@ config CLKSRC_EXYNOS_MCT
 config CLKSRC_SAMSUNG_PWM
        bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          This is a new clocksource driver for the PWM timer found in
          Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -257,12 +265,14 @@ config CLKSRC_SAMSUNG_PWM
 config FSL_FTM_TIMER
        bool "Freescale FlexTimer Module driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Support for Freescale FlexTimer Module (FTM) timer.
 
 config VF_PIT_TIMER
        bool
+       select CLKSRC_MMIO
        help
          Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -360,6 +370,7 @@ config CLKSRC_TANGO_XTAL
 config CLKSRC_PXA
        bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          This enables OST0 support available on PXA and SA-11x0
@@ -394,6 +405,7 @@ config CLKSRC_ST_LPC
        bool "Low power clocksource found in the LPC" if COMPILE_TEST
        select CLKSRC_OF if OF
        depends on HAS_IOMEM
+       select CLKSRC_MMIO
        help
          Enable this option to use the Low Power controller timer
          as clocksource.
index 6ee9140..4da2af9 100644 (file)
@@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d)
 
        __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
        __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-       clk_disable(tcd->clk);
+       if (!clockevent_state_detached(d))
+               clk_disable(tcd->clk);
 
        return 0;
 }
index 9bc37c4..0ca74d0 100644 (file)
@@ -142,15 +142,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
        cpu_reg = regulator_get_optional(cpu_dev, reg);
-       if (IS_ERR(cpu_reg)) {
+       ret = PTR_ERR_OR_ZERO(cpu_reg);
+       if (ret) {
                /*
                 * If cpu's regulator supply node is present, but regulator is
                 * not yet registered, we should try defering probe.
                 */
-               if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+               if (ret == -EPROBE_DEFER) {
                        dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
                                cpu);
-                       return -EPROBE_DEFER;
+                       return ret;
                }
 
                /* Try with "cpu-supply" */
@@ -159,18 +160,16 @@ try_again:
                        goto try_again;
                }
 
-               dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-                       cpu, PTR_ERR(cpu_reg));
+               dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
        }
 
        cpu_clk = clk_get(cpu_dev, NULL);
-       if (IS_ERR(cpu_clk)) {
+       ret = PTR_ERR_OR_ZERO(cpu_clk);
+       if (ret) {
                /* put regulator */
                if (!IS_ERR(cpu_reg))
                        regulator_put(cpu_reg);
 
-               ret = PTR_ERR(cpu_clk);
-
                /*
                 * If cpu's clk node is present, but clock is not yet
                 * registered, we should try defering probe.
index c35e7da..e979ec7 100644 (file)
@@ -48,11 +48,11 @@ static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
                                          bool active)
 {
        do {
-               policy = list_next_entry(policy, policy_list);
-
                /* No more policies in the list */
-               if (&policy->policy_list == &cpufreq_policy_list)
+               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
                        return NULL;
+
+               policy = list_next_entry(policy, policy_list);
        } while (!suitable_policy(policy, active));
 
        return policy;
index bab3a51..e0d1110 100644 (file)
@@ -387,16 +387,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
        if (!have_governor_per_policy())
                cdata->gdbs_data = dbs_data;
 
+       policy->governor_data = dbs_data;
+
        ret = sysfs_create_group(get_governor_parent_kobj(policy),
                                 get_sysfs_attr(dbs_data));
        if (ret)
                goto reset_gdbs_data;
 
-       policy->governor_data = dbs_data;
-
        return 0;
 
 reset_gdbs_data:
+       policy->governor_data = NULL;
+
        if (!have_governor_per_policy())
                cdata->gdbs_data = NULL;
        cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
        if (!cdbs->shared || cdbs->shared->policy)
                return -EBUSY;
 
-       policy->governor_data = NULL;
        if (!--dbs_data->usage_count) {
                sysfs_remove_group(get_governor_parent_kobj(policy),
                                   get_sysfs_attr(dbs_data));
 
+               policy->governor_data = NULL;
+
                if (!have_governor_per_policy())
                        cdata->gdbs_data = NULL;
 
                cdata->exit(dbs_data, policy->governor->initialized == 1);
                kfree(dbs_data);
+       } else {
+               policy->governor_data = NULL;
        }
 
        free_common_dbs_info(policy, cdata);
index 1d99c97..0963772 100644 (file)
@@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void)
        }
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
        return 0;
 }
index 344058f..d5657d5 100644 (file)
@@ -119,7 +119,6 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE       (-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
index 046423b..f996efc 100644 (file)
@@ -153,7 +153,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * be frozen safely.
         */
        index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-       if (index >= 0)
+       if (index > 0)
                enter_freeze_proper(drv, dev, index);
 
        return index;
index 3dd69df..07d4942 100644 (file)
@@ -381,6 +381,7 @@ config CRYPTO_DEV_BFIN_CRC
 
 config CRYPTO_DEV_ATMEL_AES
        tristate "Support for Atmel AES hw accelerator"
+       depends on HAS_DMA
        depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
        select CRYPTO_AES
        select CRYPTO_AEAD
index 5621612..3eb3f12 100644 (file)
@@ -280,6 +280,7 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
        case AES_GCMHR(2):
        case AES_GCMHR(3):
                snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
+               break;
 
        default:
                snprintf(tmp, sz, "0x%02x", offset);
@@ -399,7 +400,7 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -429,7 +430,7 @@ static int atmel_aes_hw_version_init(struct atmel_aes_dev *dd)
 
        dev_info(dd->dev, "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        return 0;
 }
 
@@ -447,7 +448,7 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd)
 
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        dd->flags &= ~AES_FLAGS_BUSY;
 
        if (dd->is_async)
@@ -2090,10 +2091,14 @@ static int atmel_aes_probe(struct platform_device *pdev)
                goto res_err;
        }
 
-       err = atmel_aes_hw_version_init(aes_dd);
+       err = clk_prepare(aes_dd->iclk);
        if (err)
                goto res_err;
 
+       err = atmel_aes_hw_version_init(aes_dd);
+       if (err)
+               goto iclk_unprepare;
+
        atmel_aes_get_cap(aes_dd);
 
        err = atmel_aes_buff_init(aes_dd);
@@ -2126,6 +2131,8 @@ err_algs:
 err_aes_dma:
        atmel_aes_buff_cleanup(aes_dd);
 err_aes_buff:
+iclk_unprepare:
+       clk_unprepare(aes_dd->iclk);
 res_err:
        tasklet_kill(&aes_dd->done_task);
        tasklet_kill(&aes_dd->queue_task);
@@ -2154,6 +2161,8 @@ static int atmel_aes_remove(struct platform_device *pdev)
        atmel_aes_dma_cleanup(aes_dd);
        atmel_aes_buff_cleanup(aes_dd);
 
+       clk_unprepare(aes_dd->iclk);
+
        return 0;
 }
 
index 20de861..8bf9914 100644 (file)
@@ -782,7 +782,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err)
        dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
                        SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 
        if (req->base.complete)
                req->base.complete(&req->base, err);
@@ -795,7 +795,7 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -822,7 +822,7 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
        dev_info(dd->dev,
                        "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 }
 
 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
@@ -1410,6 +1410,10 @@ static int atmel_sha_probe(struct platform_device *pdev)
                goto res_err;
        }
 
+       err = clk_prepare(sha_dd->iclk);
+       if (err)
+               goto res_err;
+
        atmel_sha_hw_version_init(sha_dd);
 
        atmel_sha_get_cap(sha_dd);
@@ -1421,12 +1425,12 @@ static int atmel_sha_probe(struct platform_device *pdev)
                        if (IS_ERR(pdata)) {
                                dev_err(&pdev->dev, "platform data not available\n");
                                err = PTR_ERR(pdata);
-                               goto res_err;
+                               goto iclk_unprepare;
                        }
                }
                if (!pdata->dma_slave) {
                        err = -ENXIO;
-                       goto res_err;
+                       goto iclk_unprepare;
                }
                err = atmel_sha_dma_init(sha_dd, pdata);
                if (err)
@@ -1457,6 +1461,8 @@ err_algs:
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 err_sha_dma:
+iclk_unprepare:
+       clk_unprepare(sha_dd->iclk);
 res_err:
        tasklet_kill(&sha_dd->done_task);
 sha_dd_err:
@@ -1483,12 +1489,7 @@ static int atmel_sha_remove(struct platform_device *pdev)
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 
-       iounmap(sha_dd->io_base);
-
-       clk_put(sha_dd->iclk);
-
-       if (sha_dd->irq >= 0)
-               free_irq(sha_dd->irq, sha_dd);
+       clk_unprepare(sha_dd->iclk);
 
        return 0;
 }
index 8abb4bc..69d4a13 100644 (file)
@@ -534,8 +534,8 @@ static int caam_probe(struct platform_device *pdev)
         * long pointers in master configuration register
         */
        clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH |
-                     MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
-                                       MCFGR_LONG_PTR : 0));
+                     MCFGR_AWCACHE_BUFF | MCFGR_WDENABLE |
+                     (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
 
        /*
         *  Read the Compile Time paramters and SCFGR to determine
index 0643e33..c0656e7 100644 (file)
@@ -306,7 +306,7 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
                return -ENOMEM;
 
        dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0);
-       if (!dma->cache_pool)
+       if (!dma->padding_pool)
                return -ENOMEM;
 
        cesa->dma = dma;
index 0ac0ba8..1e480f1 100644 (file)
@@ -389,7 +389,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
 {
        unsigned int base_cnt, cur_cnt;
        unsigned char ae;
-       unsigned int times = MAX_RETRY_TIMES;
+       int times = MAX_RETRY_TIMES;
 
        for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
                qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
@@ -402,7 +402,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
                        cur_cnt &= 0xffff;
                } while (times-- && (cur_cnt == base_cnt));
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: AE%d is inactive!!\n", ae);
                        return -EFAULT;
                }
@@ -453,7 +453,11 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
        void __iomem *csr_addr =
                        (void __iomem *)((uintptr_t)handle->hal_ep_csr_addr_v +
                        ESRAM_AUTO_INIT_CSR_OFFSET);
-       unsigned int csr_val, times = 30;
+       unsigned int csr_val;
+       int times = 30;
+
+       if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+               return 0;
 
        csr_val = ADF_CSR_RD(csr_addr, 0);
        if ((csr_val & ESRAM_AUTO_TINIT) && (csr_val & ESRAM_AUTO_TINIT_DONE))
@@ -467,7 +471,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
                qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0);
                csr_val = ADF_CSR_RD(csr_addr, 0);
        } while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--);
-       if ((!times)) {
+       if ((times < 0)) {
                pr_err("QAT: Fail to init eSram!\n");
                return -EFAULT;
        }
@@ -658,7 +662,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
                        ret = qat_hal_wait_cycles(handle, ae, 20, 1);
                } while (ret && times--);
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: clear GPR of AE %d failed", ae);
                        return -EINVAL;
                }
@@ -693,14 +697,12 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        struct adf_bar *misc_bar =
                        &pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
-       struct adf_bar *sram_bar =
-                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+       struct adf_bar *sram_bar;
 
        handle = kzalloc(sizeof(*handle), GFP_KERNEL);
        if (!handle)
                return -ENOMEM;
 
-       handle->hal_sram_addr_v = sram_bar->virt_addr;
        handle->hal_cap_g_ctl_csr_addr_v =
                (void __iomem *)((uintptr_t)misc_bar->virt_addr +
                                 ICP_QAT_CAP_OFFSET);
@@ -714,6 +716,11 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
                (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
                                 LOCAL_TO_XFER_REG_OFFSET);
        handle->pci_dev = pci_info->pci_dev;
+       if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+               sram_bar =
+                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+               handle->hal_sram_addr_v = sram_bar->virt_addr;
+       }
        handle->fw_auth = (handle->pci_dev->device ==
                           ADF_DH895XCC_PCI_DEVICE_ID) ? false : true;
        handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
index 66f729e..20c9539 100644 (file)
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
 
 # add asic specific block
-amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
        ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
        amdgpu_amdkfd_gfx_v7.o
 
@@ -34,6 +34,7 @@ amdgpu-y += \
 
 # add GMC block
 amdgpu-y += \
+       gmc_v7_0.o \
        gmc_v8_0.o
 
 # add IH block
index 313b0cc..82edf95 100644 (file)
@@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_temperature((adev))
+             (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+             (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_fan_control_mode((adev))
+             (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-               (adev)->pm.funcs->get_sclk((adev), (l))
+               (adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->get_mclk((adev), (l))
+             (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->force_performance_level((adev), (l))
+             (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_uvd((adev), (g))
+             (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_vce((adev), (g))
+             (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
        (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
index 0e13763..362bedc 100644 (file)
@@ -154,7 +154,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index 79fa5c7..04b744d 100644 (file)
@@ -115,7 +115,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index 6f89f8e..b882e81 100644 (file)
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
        unsigned i;
 
-       amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
        if (!error) {
+               amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
                 * This assures that the smallest buffers are added first
index b5dbbb5..9c1af89 100644 (file)
@@ -256,11 +256,11 @@ static struct pci_device_id pciidlist[] = {
        {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
 #endif
        /* topaz */
-       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
        /* tonga */
        {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
        {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
index cfb6caa..9191467 100644 (file)
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
        if (!adev->mode_info.mode_config_initialized)
                return 0;
 
+       /* don't init fbdev if there are no connectors */
+       if (list_empty(&adev->ddev->mode_config.connector_list))
+               return 0;
+
        /* select 8 bpp console on low vram cards */
        if (adev->mc.real_vram_size <= (32*1024*1024))
                bpp_sel = 8;
index c3ce103..b8fbbd7 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -261,6 +262,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
                                       AMDGPU_GEM_DOMAIN_OA);
 
        bo->flags = flags;
+
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
        amdgpu_fill_placement_to_bo(bo, placement);
        /* Kernel allocation are uninterruptible */
        r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -399,7 +407,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                }
                if (fpfn > bo->placements[i].fpfn)
                        bo->placements[i].fpfn = fpfn;
-               if (lpfn && lpfn < bo->placements[i].lpfn)
+               if (!bo->placements[i].lpfn ||
+                   (lpfn && lpfn < bo->placements[i].lpfn))
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
index 5ee9a06..b9d0d55 100644 (file)
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
        switch (adev->asic_type) {
-               case CHIP_TONGA:
-               case CHIP_FIJI:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
-               default:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
+       case CHIP_TONGA:
+       case CHIP_FIJI:
+               adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+               break;
+       case CHIP_CARRIZO:
+       case CHIP_STONEY:
+               adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+               break;
+       /* These chips don't have powerplay implemenations */
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+       case CHIP_KABINI:
+       case CHIP_MULLINS:
+       case CHIP_KAVERI:
+       case CHIP_TOPAZ:
+       default:
+               adev->pp_enabled = false;
+               break;
        }
 #else
        adev->pp_enabled = false;
index 78e9b0f..d1f234d 100644 (file)
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "rptr: 0x%08x [%5d]\n",
                   rptr, rptr);
 
-       rptr_next = ~0;
+       rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
        seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
                   ring->wptr, ring->wptr);
index 8a1752f..55cf05e 100644 (file)
@@ -808,7 +808,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                        flags |= AMDGPU_PTE_SNOOPED;
        }
 
-       if (adev->asic_type >= CHIP_TOPAZ)
+       if (adev->asic_type >= CHIP_TONGA)
                flags |= AMDGPU_PTE_EXECUTABLE;
 
        flags |= AMDGPU_PTE_READABLE;
index aefc668..9599f75 100644 (file)
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                AMDGPU_VM_PTE_COUNT * 8);
-       unsigned pd_size, pd_entries, pts_size;
+       unsigned pd_size, pd_entries;
        int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        pd_entries = amdgpu_vm_num_pdes(adev);
 
        /* allocate page table array */
-       pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-       vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+       vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
        if (vm->page_tables == NULL) {
                DRM_ERROR("Cannot allocate memory for page table array\n");
                return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
        for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
                amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-       kfree(vm->page_tables);
+       drm_free_large(vm->page_tables);
 
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
index 72793f9..6c76139 100644 (file)
@@ -4738,6 +4738,22 @@ static int gfx_v7_0_early_init(void *handle)
        return 0;
 }
 
+static int gfx_v7_0_late_init(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
        struct amdgpu_ring *ring;
@@ -4890,6 +4906,8 @@ static int gfx_v7_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v7_0_cp_enable(adev, false);
        gfx_v7_0_rlc_stop(adev);
        gfx_v7_0_fini_pg(adev);
@@ -5527,7 +5545,7 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 
 const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
        .early_init = gfx_v7_0_early_init,
-       .late_init = NULL,
+       .late_init = gfx_v7_0_late_init,
        .sw_init = gfx_v7_0_sw_init,
        .sw_fini = gfx_v7_0_sw_fini,
        .hw_init = gfx_v7_0_hw_init,
index 13235d8..8f8ec37 100644 (file)
@@ -111,7 +111,6 @@ MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
-MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 
 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
@@ -828,7 +827,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       if (adev->asic_type != CHIP_STONEY) {
+       if ((adev->asic_type != CHIP_STONEY) &&
+           (adev->asic_type != CHIP_TOPAZ)) {
                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
                if (!err) {
@@ -3851,10 +3851,16 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
                        if (r)
                                return -EINVAL;
 
-                       r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-                                                       AMDGPU_UCODE_ID_CP_MEC1);
-                       if (r)
-                               return -EINVAL;
+                       if (adev->asic_type == CHIP_TOPAZ) {
+                               r = gfx_v8_0_cp_compute_load_microcode(adev);
+                               if (r)
+                                       return r;
+                       } else {
+                               r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+                                                                                AMDGPU_UCODE_ID_CP_MEC1);
+                               if (r)
+                                       return -EINVAL;
+                       }
                }
        }
 
@@ -3901,6 +3907,8 @@ static int gfx_v8_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v8_0_cp_enable(adev, false);
        gfx_v8_0_rlc_stop(adev);
        gfx_v8_0_cp_compute_fini(adev);
@@ -4186,7 +4194,18 @@ static int gfx_v8_0_soft_reset(void *handle)
                gfx_v8_0_cp_gfx_enable(adev, false);
 
                /* Disable MEC parsing/prefetching */
-               /* XXX todo */
+               gfx_v8_0_cp_compute_enable(adev, false);
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 1);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 1);
+                       WREG32(mmGMCON_DEBUG, tmp);
+
+                       udelay(50);
+               }
 
                if (grbm_soft_reset) {
                        tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4234,16 @@ static int gfx_v8_0_soft_reset(void *handle)
                        WREG32(mmSRBM_SOFT_RESET, tmp);
                        tmp = RREG32(mmSRBM_SOFT_RESET);
                }
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 0);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 0);
+                       WREG32(mmGMCON_DEBUG, tmp);
+               }
+
                /* Wait a little for things to settle down */
                udelay(50);
                gfx_v8_0_print_status((void *)adev);
@@ -4308,6 +4337,14 @@ static int gfx_v8_0_late_init(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int r;
 
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
        /* requires IBs so do in late init after IB pool is initialized */
        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
        if (r)
index 3f95606..8aa2991 100644 (file)
@@ -42,9 +42,39 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
+
+static const u32 golden_settings_iceland_a11[] =
+{
+       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
+{
+       switch (adev->asic_type) {
+       case CHIP_TOPAZ:
+               amdgpu_program_register_sequence(adev,
+                                                iceland_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
+               amdgpu_program_register_sequence(adev,
+                                                golden_settings_iceland_a11,
+                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
+               break;
+       default:
+               break;
+       }
+}
 
 /**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
+ * gmc7_mc_wait_for_idle - wait for MC idle callback.
  *
  * @adev: amdgpu_device pointer
  *
@@ -132,13 +162,20 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
        case CHIP_HAWAII:
                chip_name = "hawaii";
                break;
+       case CHIP_TOPAZ:
+               chip_name = "topaz";
+               break;
        case CHIP_KAVERI:
        case CHIP_KABINI:
                return 0;
        default: BUG();
        }
 
-       snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+       if (adev->asic_type == CHIP_TOPAZ)
+               snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+       else
+               snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+
        err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -984,6 +1021,8 @@ static int gmc_v7_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       gmc_v7_0_init_golden_registers(adev);
+
        gmc_v7_0_mc_program(adev);
 
        if (!(adev->flags & AMD_IS_APU)) {
index c0c9a01..3efd455 100644 (file)
@@ -42,9 +42,7 @@
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 
-MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
-MODULE_FIRMWARE("amdgpu/fiji_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -75,19 +73,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
-static const u32 golden_settings_iceland_a11[] =
-{
-       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
-};
-
-static const u32 iceland_mgcg_cgcg_init[] =
-{
-       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
-};
-
 static const u32 cz_mgcg_cgcg_init[] =
 {
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
@@ -102,14 +87,6 @@ static const u32 stoney_mgcg_cgcg_init[] =
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               amdgpu_program_register_sequence(adev,
-                                                iceland_mgcg_cgcg_init,
-                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
-               amdgpu_program_register_sequence(adev,
-                                                golden_settings_iceland_a11,
-                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
-               break;
        case CHIP_FIJI:
                amdgpu_program_register_sequence(adev,
                                                 fiji_mgcg_cgcg_init,
@@ -229,15 +206,10 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
        DRM_DEBUG("\n");
 
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               chip_name = "topaz";
-               break;
        case CHIP_TONGA:
                chip_name = "tonga";
                break;
        case CHIP_FIJI:
-               chip_name = "fiji";
-               break;
        case CHIP_CARRIZO:
        case CHIP_STONEY:
                return 0;
@@ -1007,7 +979,7 @@ static int gmc_v8_0_hw_init(void *handle)
 
        gmc_v8_0_mc_program(adev);
 
-       if (!(adev->flags & AMD_IS_APU)) {
+       if (adev->asic_type == CHIP_TONGA) {
                r = gmc_v8_0_mc_load_microcode(adev);
                if (r) {
                        DRM_ERROR("Failed to load MC firmware!\n");
index 966d4b2..090486c 100644 (file)
@@ -432,7 +432,7 @@ static uint32_t iceland_smu_get_mask_for_fw_type(uint32_t fw_type)
                case AMDGPU_UCODE_ID_CP_ME:
                        return UCODE_ID_CP_ME_MASK;
                case AMDGPU_UCODE_ID_CP_MEC1:
-                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK;
+                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK;
                case AMDGPU_UCODE_ID_CP_MEC2:
                        return UCODE_ID_CP_MEC_MASK;
                case AMDGPU_UCODE_ID_RLC_G:
@@ -522,12 +522,6 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-       if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_CP_MEC_JT2,
-                       &toc->entry[toc->num_entries++])) {
-               DRM_ERROR("Failed to get firmware entry for MEC_JT2\n");
-               return -EINVAL;
-       }
-
        if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_SDMA0,
                        &toc->entry[toc->num_entries++])) {
                DRM_ERROR("Failed to get firmware entry for SDMA0\n");
@@ -550,8 +544,8 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                        UCODE_ID_CP_ME_MASK |
                        UCODE_ID_CP_PFP_MASK |
                        UCODE_ID_CP_MEC_MASK |
-                       UCODE_ID_CP_MEC_JT1_MASK |
-                       UCODE_ID_CP_MEC_JT2_MASK;
+                       UCODE_ID_CP_MEC_JT1_MASK;
+
 
        if (iceland_send_msg_to_smc_with_parameter_without_waiting(adev, PPSMC_MSG_LoadUcodes, fw_to_load)) {
                DRM_ERROR("Fail to request SMU load ucode\n");
index f4a1346..0497784 100644 (file)
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
 
 static int tonga_dpm_suspend(void *handle)
 {
-       return 0;
+       return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-       int ret;
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       mutex_lock(&adev->pm.mutex);
-
-       ret = tonga_smu_start(adev);
-       if (ret) {
-               DRM_ERROR("SMU start failed\n");
-               goto fail;
-       }
-
-fail:
-       mutex_unlock(&adev->pm.mutex);
-       return ret;
+       return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
index 652e766..89f5a1f 100644 (file)
@@ -61,6 +61,7 @@
 #include "vi.h"
 #include "vi_dpm.h"
 #include "gmc_v8_0.h"
+#include "gmc_v7_0.h"
 #include "gfx_v8_0.h"
 #include "sdma_v2_4.h"
 #include "sdma_v3_0.h"
@@ -1109,10 +1110,10 @@ static const struct amdgpu_ip_block_version topaz_ip_blocks[] =
        },
        {
                .type = AMD_IP_BLOCK_TYPE_GMC,
-               .major = 8,
-               .minor = 0,
+               .major = 7,
+               .minor = 4,
                .rev = 0,
-               .funcs = &gmc_v8_0_ip_funcs,
+               .funcs = &gmc_v7_0_ip_funcs,
        },
        {
                .type = AMD_IP_BLOCK_TYPE_IH,
@@ -1442,8 +1443,7 @@ static int vi_common_early_init(void *handle)
                break;
        case CHIP_FIJI:
                adev->has_uvd = true;
-               adev->cg_flags = AMDGPU_CG_SUPPORT_UVD_MGCG |
-                               AMDGPU_CG_SUPPORT_VCE_MGCG;
+               adev->cg_flags = 0;
                adev->pg_flags = 0;
                adev->external_rev_id = adev->rev_id + 0x3c;
                break;
index 9be0070..a902ae0 100644 (file)
@@ -194,7 +194,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 
        kfree(p);
 
-       kfree((void *)work);
+       kfree(work);
 }
 
 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
index 8f5d5ed..aa67244 100644 (file)
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
        if (ret == 0)
                ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+       if (ret)
+               printk("amdgpu: powerplay initialization failed\n");
+       else
+               printk("amdgpu: powerplay initialized\n");
+
        return ret;
 }
 
index 873a8d2..ec222c6 100644 (file)
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
                                UCODE_ID_CP_MEC_JT1_MASK |
                                UCODE_ID_CP_MEC_JT2_MASK;
 
+       if (smumgr->chip_id == CHIP_STONEY)
+               fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
        cz_request_smu_load_fw(smumgr);
        cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
        return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
                        enum cz_scratch_entry firmware_enum)
 {
        uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_SDMA0;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-               ret = UCODE_ID_SDMA1;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_SDMA0;
+               else
+                       ret = UCODE_ID_SDMA1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
                ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_CP_MEC_JT1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-               ret = UCODE_ID_CP_MEC_JT2;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_CP_MEC_JT1;
+               else
+                       ret = UCODE_ID_CP_MEC_JT2;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
                ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = type;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = TASK_TYPE_UCODE_LOAD;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
 
        for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-               firmware_type = cz_translate_firmware_enum_to_arg(
+               firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
                                        firmware_list[i]);
 
                ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
index 57cccd6..7c52306 100644 (file)
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
        }
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-                               struct drm_atomic_state *old_state,
-                               struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc)
 {
        struct drm_plane *plane;
        struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
 
        return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                if (old_state->legacy_cursor_update)
                        continue;
 
-               if (!framebuffer_changed(dev, old_state, crtc))
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
                        continue;
 
                ret = drm_crtc_vblank_get(crtc);
index 6ed90a2..8ae13de 100644 (file)
@@ -803,12 +803,33 @@ static struct drm_dp_mst_branch *drm_dp_add_mst_branch_device(u8 lct, u8 *rad)
        return mstb;
 }
 
+static void drm_dp_free_mst_port(struct kref *kref);
+
+static void drm_dp_free_mst_branch_device(struct kref *kref)
+{
+       struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
+       if (mstb->port_parent) {
+               if (list_empty(&mstb->port_parent->next))
+                       kref_put(&mstb->port_parent->kref, drm_dp_free_mst_port);
+       }
+       kfree(mstb);
+}
+
 static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 {
        struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
        struct drm_dp_mst_port *port, *tmp;
        bool wake_tx = false;
 
+       /*
+        * init kref again to be used by ports to remove mst branch when it is
+        * not needed anymore
+        */
+       kref_init(kref);
+
+       if (mstb->port_parent && list_empty(&mstb->port_parent->next))
+               kref_get(&mstb->port_parent->kref);
+
        /*
         * destroy all ports - don't need lock
         * as there are no more references to the mst branch
@@ -835,7 +856,8 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 
        if (wake_tx)
                wake_up(&mstb->mgr->tx_waitq);
-       kfree(mstb);
+
+       kref_put(kref, drm_dp_free_mst_branch_device);
 }
 
 static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb)
@@ -883,6 +905,7 @@ static void drm_dp_destroy_port(struct kref *kref)
                         * from an EDID retrieval */
 
                        mutex_lock(&mgr->destroy_connector_lock);
+                       kref_get(&port->parent->kref);
                        list_add(&port->next, &mgr->destroy_connector_list);
                        mutex_unlock(&mgr->destroy_connector_lock);
                        schedule_work(&mgr->destroy_connector_work);
@@ -1018,18 +1041,27 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port)
        return send_link;
 }
 
-static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb,
-                                  struct drm_dp_mst_port *port)
+static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid)
 {
        int ret;
-       if (port->dpcd_rev >= 0x12) {
-               port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid);
-               if (!port->guid_valid) {
-                       ret = drm_dp_send_dpcd_write(mstb->mgr,
-                                                    port,
-                                                    DP_GUID,
-                                                    16, port->guid);
-                       port->guid_valid = true;
+
+       memcpy(mstb->guid, guid, 16);
+
+       if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) {
+               if (mstb->port_parent) {
+                       ret = drm_dp_send_dpcd_write(
+                                       mstb->mgr,
+                                       mstb->port_parent,
+                                       DP_GUID,
+                                       16,
+                                       mstb->guid);
+               } else {
+
+                       ret = drm_dp_dpcd_write(
+                                       mstb->mgr->aux,
+                                       DP_GUID,
+                                       mstb->guid,
+                                       16);
                }
        }
 }
@@ -1086,7 +1118,6 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
        port->dpcd_rev = port_msg->dpcd_revision;
        port->num_sdp_streams = port_msg->num_sdp_streams;
        port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks;
-       memcpy(port->guid, port_msg->peer_guid, 16);
 
        /* manage mstb port lists with mgr lock - take a reference
           for this list */
@@ -1099,11 +1130,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
 
        if (old_ddps != port->ddps) {
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
                        if (!port->input)
                                drm_dp_send_enum_path_resources(mstb->mgr, mstb, port);
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                        }
        }
@@ -1130,13 +1159,11 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
                        drm_dp_put_port(port);
                        goto out;
                }
-               if (port->port_num >= DP_MST_LOGICAL_PORT_0) {
-                       port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc);
-                       drm_mode_connector_set_tile_property(port->connector);
-               }
+
+               drm_mode_connector_set_tile_property(port->connector);
+
                (*mstb->mgr->cbs->register_connector)(port->connector);
        }
-
 out:
        /* put reference to this port */
        drm_dp_put_port(port);
@@ -1161,11 +1188,9 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb,
        port->ddps = conn_stat->displayport_device_plug_status;
 
        if (old_ddps != port->ddps) {
+               dowork = true;
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
-                       dowork = true;
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                }
        }
@@ -1222,13 +1247,14 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper(
        struct drm_dp_mst_branch *found_mstb;
        struct drm_dp_mst_port *port;
 
+       if (memcmp(mstb->guid, guid, 16) == 0)
+               return mstb;
+
+
        list_for_each_entry(port, &mstb->ports, next) {
                if (!port->mstb)
                        continue;
 
-               if (port->guid_valid && memcmp(port->guid, guid, 16) == 0)
-                       return port->mstb;
-
                found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid);
 
                if (found_mstb)
@@ -1247,10 +1273,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid(
        /* find the port by iterating down */
        mutex_lock(&mgr->lock);
 
-       if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0)
-               mstb = mgr->mst_primary;
-       else
-               mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
+       mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
 
        if (mstb)
                kref_get(&mstb->kref);
@@ -1271,8 +1294,13 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m
                if (port->input)
                        continue;
 
-               if (!port->ddps)
+               if (!port->ddps) {
+                       if (port->cached_edid) {
+                               kfree(port->cached_edid);
+                               port->cached_edid = NULL;
+                       }
                        continue;
+               }
 
                if (!port->available_pbn)
                        drm_dp_send_enum_path_resources(mgr, mstb, port);
@@ -1283,6 +1311,12 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m
                                drm_dp_check_and_send_link_address(mgr, mstb_child);
                                drm_dp_put_mst_branch_device(mstb_child);
                        }
+               } else if (port->pdt == DP_PEER_DEVICE_SST_SINK ||
+                       port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV) {
+                       if (!port->cached_edid) {
+                               port->cached_edid =
+                                       drm_get_edid(port->connector, &port->aux.ddc);
+                       }
                }
        }
 }
@@ -1302,6 +1336,8 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work)
                drm_dp_check_and_send_link_address(mgr, mstb);
                drm_dp_put_mst_branch_device(mstb);
        }
+
+       (*mgr->cbs->hotplug)(mgr);
 }
 
 static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr,
@@ -1555,10 +1591,12 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_streams,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks);
                        }
+
+                       drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid);
+
                        for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
                                drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]);
                        }
-                       (*mgr->cbs->hotplug)(mgr);
                }
        } else {
                mstb->link_address_sent = false;
@@ -1602,6 +1640,37 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
        return 0;
 }
 
+static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
+{
+       if (!mstb->port_parent)
+               return NULL;
+
+       if (mstb->port_parent->mstb != mstb)
+               return mstb->port_parent;
+
+       return drm_dp_get_last_connected_port_to_mstb(mstb->port_parent->parent);
+}
+
+static struct drm_dp_mst_branch *drm_dp_get_last_connected_port_and_mstb(struct drm_dp_mst_topology_mgr *mgr,
+                                                                        struct drm_dp_mst_branch *mstb,
+                                                                        int *port_num)
+{
+       struct drm_dp_mst_branch *rmstb = NULL;
+       struct drm_dp_mst_port *found_port;
+       mutex_lock(&mgr->lock);
+       if (mgr->mst_primary) {
+               found_port = drm_dp_get_last_connected_port_to_mstb(mstb);
+
+               if (found_port) {
+                       rmstb = found_port->parent;
+                       kref_get(&rmstb->kref);
+                       *port_num = found_port->port_num;
+               }
+       }
+       mutex_unlock(&mgr->lock);
+       return rmstb;
+}
+
 static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                                   struct drm_dp_mst_port *port,
                                   int id,
@@ -1609,13 +1678,18 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 {
        struct drm_dp_sideband_msg_tx *txmsg;
        struct drm_dp_mst_branch *mstb;
-       int len, ret;
+       int len, ret, port_num;
        u8 sinks[DRM_DP_MAX_SDP_STREAMS];
        int i;
 
+       port_num = port->port_num;
        mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
-       if (!mstb)
-               return -EINVAL;
+       if (!mstb) {
+               mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
+
+               if (!mstb)
+                       return -EINVAL;
+       }
 
        txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
        if (!txmsg) {
@@ -1627,7 +1701,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                sinks[i] = i;
 
        txmsg->dst = mstb;
-       len = build_allocate_payload(txmsg, port->port_num,
+       len = build_allocate_payload(txmsg, port_num,
                                     id,
                                     pbn, port->num_sdp_streams, sinks);
 
@@ -1983,31 +2057,17 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
                mgr->mst_primary = mstb;
                kref_get(&mgr->mst_primary->kref);
 
-               {
-                       struct drm_dp_payload reset_pay;
-                       reset_pay.start_slot = 0;
-                       reset_pay.num_slots = 0x3f;
-                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
-               }
-
                ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
-                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
+                                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
                if (ret < 0) {
                        goto out_unlock;
                }
 
-
-               /* sort out guid */
-               ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16);
-               if (ret != 16) {
-                       DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret);
-                       goto out_unlock;
-               }
-
-               mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid);
-               if (!mgr->guid_valid) {
-                       ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16);
-                       mgr->guid_valid = true;
+               {
+                       struct drm_dp_payload reset_pay;
+                       reset_pay.start_slot = 0;
+                       reset_pay.num_slots = 0x3f;
+                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
                }
 
                queue_work(system_long_wq, &mgr->work);
@@ -2231,9 +2291,8 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
                        }
 
                        drm_dp_update_port(mstb, &msg.u.conn_stat);
-                       DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
-                       (*mgr->cbs->hotplug)(mgr);
 
+                       DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
                } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
                        drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false);
                        if (!mstb)
@@ -2320,10 +2379,6 @@ enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector
 
        case DP_PEER_DEVICE_SST_SINK:
                status = connector_status_connected;
-               /* for logical ports - cache the EDID */
-               if (port->port_num >= 8 && !port->cached_edid) {
-                       port->cached_edid = drm_get_edid(connector, &port->aux.ddc);
-               }
                break;
        case DP_PEER_DEVICE_DP_LEGACY_CONV:
                if (port->ldps)
@@ -2378,10 +2433,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_
 
        if (port->cached_edid)
                edid = drm_edid_duplicate(port->cached_edid);
-       else {
-               edid = drm_get_edid(connector, &port->aux.ddc);
-               drm_mode_connector_set_tile_property(connector);
-       }
+
        port->has_audio = drm_detect_monitor_audio(edid);
        drm_dp_put_port(port);
        return edid;
@@ -2446,6 +2498,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp
                DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn);
                if (pbn == port->vcpi.pbn) {
                        *slots = port->vcpi.num_slots;
+                       drm_dp_put_port(port);
                        return true;
                }
        }
@@ -2605,32 +2658,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
  */
 int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
-       fixed20_12 pix_bw;
-       fixed20_12 fbpp;
-       fixed20_12 result;
-       fixed20_12 margin, tmp;
-       u32 res;
-
-       pix_bw.full = dfixed_const(clock);
-       fbpp.full = dfixed_const(bpp);
-       tmp.full = dfixed_const(8);
-       fbpp.full = dfixed_div(fbpp, tmp);
-
-       result.full = dfixed_mul(pix_bw, fbpp);
-       margin.full = dfixed_const(54);
-       tmp.full = dfixed_const(64);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_div(result, margin);
-
-       margin.full = dfixed_const(1006);
-       tmp.full = dfixed_const(1000);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_mul(result, margin);
-
-       result.full = dfixed_div(result, tmp);
-       result.full = dfixed_ceil(result);
-       res = dfixed_trunc(result);
-       return res;
+       u64 kbps;
+       s64 peak_kbps;
+       u32 numerator;
+       u32 denominator;
+
+       kbps = clock * bpp;
+
+       /*
+        * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+        * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
+        * common multiplier to render an integer PBN for all link rate/lane
+        * counts combinations
+        * calculate
+        * peak_kbps *= (1006/1000)
+        * peak_kbps *= (64/54)
+        * peak_kbps *= 8    convert to bytes
+        */
+
+       numerator = 64 * 1006;
+       denominator = 54 * 8 * 1000 * 1000;
+
+       kbps *= numerator;
+       peak_kbps = drm_fixp_from_fraction(kbps, denominator);
+
+       return drm_fixp2int_ceil(peak_kbps);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
@@ -2638,11 +2690,23 @@ static int test_calc_pbn_mode(void)
 {
        int ret;
        ret = drm_dp_calc_pbn_mode(154000, 30);
-       if (ret != 689)
+       if (ret != 689) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               154000, 30, 689, ret);
                return -EINVAL;
+       }
        ret = drm_dp_calc_pbn_mode(234000, 30);
-       if (ret != 1047)
+       if (ret != 1047) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               234000, 30, 1047, ret);
                return -EINVAL;
+       }
+       ret = drm_dp_calc_pbn_mode(297000, 24);
+       if (ret != 1063) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               297000, 24, 1063, ret);
+               return -EINVAL;
+       }
        return 0;
 }
 
@@ -2783,6 +2847,13 @@ static void drm_dp_tx_work(struct work_struct *work)
        mutex_unlock(&mgr->qlock);
 }
 
+static void drm_dp_free_mst_port(struct kref *kref)
+{
+       struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref);
+       kref_put(&port->parent->kref, drm_dp_free_mst_branch_device);
+       kfree(port);
+}
+
 static void drm_dp_destroy_connector_work(struct work_struct *work)
 {
        struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
@@ -2803,13 +2874,22 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
                list_del(&port->next);
                mutex_unlock(&mgr->destroy_connector_lock);
 
+               kref_init(&port->kref);
+               INIT_LIST_HEAD(&port->next);
+
                mgr->cbs->destroy_connector(mgr, port->connector);
 
                drm_dp_port_teardown_pdt(port, port->pdt);
 
-               if (!port->input && port->vcpi.vcpi > 0)
-                       drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-               kfree(port);
+               if (!port->input && port->vcpi.vcpi > 0) {
+                       if (mgr->mst_state) {
+                               drm_dp_mst_reset_vcpi_slots(mgr, port);
+                               drm_dp_update_payload_part1(mgr);
+                               drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+                       }
+               }
+
+               kref_put(&port->kref, drm_dp_free_mst_port);
                send_hotplug = true;
        }
        if (send_hotplug)
@@ -2847,6 +2927,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        mgr->max_dpcd_transaction_bytes = max_dpcd_transaction_bytes;
        mgr->max_payloads = max_payloads;
        mgr->conn_base_id = conn_base_id;
+       if (max_payloads + 1 > sizeof(mgr->payload_mask) * 8 ||
+           max_payloads + 1 > sizeof(mgr->vcpi_mask) * 8)
+               return -EINVAL;
        mgr->payloads = kcalloc(max_payloads, sizeof(struct drm_dp_payload), GFP_KERNEL);
        if (!mgr->payloads)
                return -ENOMEM;
@@ -2854,7 +2937,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        if (!mgr->proposed_vcpis)
                return -ENOMEM;
        set_bit(0, &mgr->payload_mask);
-       test_calc_pbn_mode();
+       if (test_calc_pbn_mode() < 0)
+               DRM_ERROR("MST PBN self-test failed\n");
+
        return 0;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
index c3b80fd..7b30b30 100644 (file)
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
 void drm_ht_remove(struct drm_open_hash *ht)
 {
        if (ht->table) {
-               if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
-                       kfree(ht->table);
-               else
-                       vfree(ht->table);
+               kvfree(ht->table);
                ht->table = NULL;
        }
 }
index 9e585d5..e881482 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@ Copyright (C) 2015
 #define ENDIAN_MODE_NO_SWAP                                    0x00000000
 #define ENDIAN_MODE_SWAP_16                                    0x00000001
 #define ENDIAN_MODE_SWAP_32                                    0x00000002
+#define chipModel_GC200                                                0x00000200
 #define chipModel_GC300                                                0x00000300
 #define chipModel_GC320                                                0x00000320
+#define chipModel_GC328                                                0x00000328
 #define chipModel_GC350                                                0x00000350
 #define chipModel_GC355                                                0x00000355
 #define chipModel_GC400                                                0x00000400
 #define chipModel_GC410                                                0x00000410
 #define chipModel_GC420                                                0x00000420
+#define chipModel_GC428                                                0x00000428
 #define chipModel_GC450                                                0x00000450
 #define chipModel_GC500                                                0x00000500
+#define chipModel_GC520                                                0x00000520
 #define chipModel_GC530                                                0x00000530
 #define chipModel_GC600                                                0x00000600
 #define chipModel_GC700                                                0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
 #define chipModel_GC860                                                0x00000860
 #define chipModel_GC880                                                0x00000880
 #define chipModel_GC1000                                       0x00001000
+#define chipModel_GC1500                                       0x00001500
 #define chipModel_GC2000                                       0x00002000
 #define chipModel_GC2100                                       0x00002100
+#define chipModel_GC2200                                       0x00002200
+#define chipModel_GC2500                                       0x00002500
+#define chipModel_GC3000                                       0x00003000
 #define chipModel_GC4000                                       0x00004000
+#define chipModel_GC5000                                       0x00005000
+#define chipModel_GC5200                                       0x00005200
+#define chipModel_GC6400                                       0x00006400
 #define RGBA_BITS_R                                            0x00000001
 #define RGBA_BITS_G                                            0x00000002
 #define RGBA_BITS_B                                            0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
 #define chipMinorFeatures2_UNK8                                        0x00000100
 #define chipMinorFeatures2_UNK9                                        0x00000200
 #define chipMinorFeatures2_UNK10                               0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16                      0x00000800
+#define chipMinorFeatures2_HALTI1                              0x00000800
 #define chipMinorFeatures2_UNK12                               0x00001000
 #define chipMinorFeatures2_UNK13                               0x00002000
 #define chipMinorFeatures2_UNK14                               0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK5                                        0x00000020
 #define chipMinorFeatures3_UNK6                                        0x00000040
 #define chipMinorFeatures3_UNK7                                        0x00000080
-#define chipMinorFeatures3_UNK8                                        0x00000100
+#define chipMinorFeatures3_FAST_MSAA                           0x00000100
 #define chipMinorFeatures3_UNK9                                        0x00000200
 #define chipMinorFeatures3_BUG_FIXES10                         0x00000400
 #define chipMinorFeatures3_UNK11                               0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK15                               0x00008000
 #define chipMinorFeatures3_UNK16                               0x00010000
 #define chipMinorFeatures3_UNK17                               0x00020000
-#define chipMinorFeatures3_UNK18                               0x00040000
+#define chipMinorFeatures3_ACE                                 0x00040000
 #define chipMinorFeatures3_UNK19                               0x00080000
 #define chipMinorFeatures3_UNK20                               0x00100000
 #define chipMinorFeatures3_UNK21                               0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK23                               0x00800000
 #define chipMinorFeatures3_UNK24                               0x01000000
 #define chipMinorFeatures3_UNK25                               0x02000000
-#define chipMinorFeatures3_UNK26                               0x04000000
+#define chipMinorFeatures3_NEW_HZ                              0x04000000
 #define chipMinorFeatures3_UNK27                               0x08000000
 #define chipMinorFeatures3_UNK28                               0x10000000
 #define chipMinorFeatures3_UNK29                               0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK13                               0x00002000
 #define chipMinorFeatures4_UNK14                               0x00004000
 #define chipMinorFeatures4_UNK15                               0x00008000
-#define chipMinorFeatures4_UNK16                               0x00010000
+#define chipMinorFeatures4_HALTI2                              0x00010000
 #define chipMinorFeatures4_UNK17                               0x00020000
-#define chipMinorFeatures4_UNK18                               0x00040000
+#define chipMinorFeatures4_SMALL_MSAA                          0x00040000
 #define chipMinorFeatures4_UNK19                               0x00080000
 #define chipMinorFeatures4_UNK20                               0x00100000
 #define chipMinorFeatures4_UNK21                               0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK29                               0x20000000
 #define chipMinorFeatures4_UNK30                               0x40000000
 #define chipMinorFeatures4_UNK31                               0x80000000
+#define chipMinorFeatures5_UNK0                                        0x00000001
+#define chipMinorFeatures5_UNK1                                        0x00000002
+#define chipMinorFeatures5_UNK2                                        0x00000004
+#define chipMinorFeatures5_UNK3                                        0x00000008
+#define chipMinorFeatures5_UNK4                                        0x00000010
+#define chipMinorFeatures5_UNK5                                        0x00000020
+#define chipMinorFeatures5_UNK6                                        0x00000040
+#define chipMinorFeatures5_UNK7                                        0x00000080
+#define chipMinorFeatures5_UNK8                                        0x00000100
+#define chipMinorFeatures5_HALTI3                              0x00000200
+#define chipMinorFeatures5_UNK10                               0x00000400
+#define chipMinorFeatures5_UNK11                               0x00000800
+#define chipMinorFeatures5_UNK12                               0x00001000
+#define chipMinorFeatures5_UNK13                               0x00002000
+#define chipMinorFeatures5_UNK14                               0x00004000
+#define chipMinorFeatures5_UNK15                               0x00008000
+#define chipMinorFeatures5_UNK16                               0x00010000
+#define chipMinorFeatures5_UNK17                               0x00020000
+#define chipMinorFeatures5_UNK18                               0x00040000
+#define chipMinorFeatures5_UNK19                               0x00080000
+#define chipMinorFeatures5_UNK20                               0x00100000
+#define chipMinorFeatures5_UNK21                               0x00200000
+#define chipMinorFeatures5_UNK22                               0x00400000
+#define chipMinorFeatures5_UNK23                               0x00800000
+#define chipMinorFeatures5_UNK24                               0x01000000
+#define chipMinorFeatures5_UNK25                               0x02000000
+#define chipMinorFeatures5_UNK26                               0x04000000
+#define chipMinorFeatures5_UNK27                               0x08000000
+#define chipMinorFeatures5_UNK28                               0x10000000
+#define chipMinorFeatures5_UNK29                               0x20000000
+#define chipMinorFeatures5_UNK30                               0x40000000
+#define chipMinorFeatures5_UNK31                               0x80000000
 
 #endif /* COMMON_XML */
index 5c89ebb..e885898 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
        .probe      = etnaviv_pdev_probe,
        .remove     = etnaviv_pdev_remove,
        .driver     = {
-               .owner  = THIS_MODULE,
                .name   = "etnaviv",
                .of_match_table = dt_match,
        },
index d6bd438..1cd6046 100644 (file)
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
        struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
                struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
index bf8fa85..4a29eea 100644 (file)
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                obj = vram->object;
 
+               mutex_lock(&obj->lock);
                pages = etnaviv_gem_get_pages(obj);
+               mutex_unlock(&obj->lock);
                if (pages) {
                        int j;
 
@@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                iter.hdr->iova = cpu_to_le64(vram->iova);
 
-               vaddr = etnaviv_gem_vaddr(&obj->base);
-               if (vaddr && !IS_ERR(vaddr))
+               vaddr = etnaviv_gem_vmap(&obj->base);
+               if (vaddr)
                        memcpy(iter.data, vaddr, obj->base.size);
 
                etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
index 9f77c3b..4b519e4 100644 (file)
@@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
        drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
-       mutex_lock(&etnaviv_obj->lock);
-       if (!etnaviv_obj->vaddr) {
-               struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-               if (IS_ERR(pages))
-                       return ERR_CAST(pages);
+       if (etnaviv_obj->vaddr)
+               return etnaviv_obj->vaddr;
 
-               etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-                               VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-       }
+       mutex_lock(&etnaviv_obj->lock);
+       /*
+        * Need to check again, as we might have raced with another thread
+        * while waiting for the mutex.
+        */
+       if (!etnaviv_obj->vaddr)
+               etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
        mutex_unlock(&etnaviv_obj->lock);
 
        return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+       struct page **pages;
+
+       lockdep_assert_held(&obj->lock);
+
+       pages = etnaviv_gem_get_pages(obj);
+       if (IS_ERR(pages))
+               return NULL;
+
+       return vmap(pages, obj->base.size >> PAGE_SHIFT,
+                       VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
        if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
        .get_pages = etnaviv_gem_shmem_get_pages,
        .release = etnaviv_gem_shmem_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
        .get_pages = etnaviv_gem_userptr_get_pages,
        .release = etnaviv_gem_userptr_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
index a300b4b..ab5df81 100644 (file)
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
 struct etnaviv_gem_ops {
        int (*get_pages)(struct etnaviv_gem_object *);
        void (*release)(struct etnaviv_gem_object *);
+       void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
index e94db4f..4e67395 100644 (file)
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-       return etnaviv_gem_vaddr(obj);
+       return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
        drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+       lockdep_assert_held(&etnaviv_obj->lock);
+
+       return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
        /* .get_pages should never be called */
        .release = etnaviv_gem_prime_release,
+       .vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
index 056a72e..a33162c 100644 (file)
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.minor_features3;
                break;
 
+       case ETNAVIV_PARAM_GPU_FEATURES_5:
+               *value = gpu->identity.minor_features4;
+               break;
+
+       case ETNAVIV_PARAM_GPU_FEATURES_6:
+               *value = gpu->identity.minor_features5;
+               break;
+
        case ETNAVIV_PARAM_GPU_STREAM_COUNT:
                *value = gpu->identity.stream_count;
                break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.num_constants;
                break;
 
+       case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+               *value = gpu->identity.varyings_count;
+               break;
+
        default:
                DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
                return -EINVAL;
@@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
        return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+       ((gpu)->identity.model == chipModel_##mod && \
+        (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+       (((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
        if (gpu->identity.minor_features0 &
            chipMinorFeatures0_MORE_MINOR_FEATURES) {
-               u32 specs[2];
+               u32 specs[4];
+               unsigned int streams;
 
                specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
                specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
-
-               gpu->identity.stream_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-               gpu->identity.register_max =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-                               >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-               gpu->identity.thread_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-               gpu->identity.vertex_cache_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-               gpu->identity.shader_core_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-               gpu->identity.pixel_pipes =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-                               >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+               specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+               specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
+
+               gpu->identity.stream_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+               gpu->identity.register_max = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+               gpu->identity.thread_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+               gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+               gpu->identity.shader_core_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+               gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
                gpu->identity.vertex_output_buffer_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
-
-               gpu->identity.buffer_size =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-               gpu->identity.instruction_count =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-               gpu->identity.num_constants =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+                       etnaviv_field(specs[0],
+                               VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
+
+               gpu->identity.buffer_size = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+               gpu->identity.instruction_count = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+               gpu->identity.num_constants = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+               gpu->identity.varyings_count = etnaviv_field(specs[2],
+                                       VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+               /* This overrides the value from older register if non-zero */
+               streams = etnaviv_field(specs[3],
+                                       VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+               if (streams)
+                       gpu->identity.stream_count = streams;
        }
 
        /* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert the register max value */
        if (gpu->identity.register_max)
                gpu->identity.register_max = 1 << gpu->identity.register_max;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.register_max = 32;
        else
                gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert thread count */
        if (gpu->identity.thread_count)
                gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.thread_count = 64;
-       else if (gpu->identity.model == 0x0500 ||
-                gpu->identity.model == 0x0530)
+       else if (gpu->identity.model == chipModel_GC500 ||
+                gpu->identity.model == chipModel_GC530)
                gpu->identity.thread_count = 128;
        else
                gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        if (gpu->identity.vertex_output_buffer_size) {
                gpu->identity.vertex_output_buffer_size =
                        1 << gpu->identity.vertex_output_buffer_size;
-       } else if (gpu->identity.model == 0x0400) {
+       } else if (gpu->identity.model == chipModel_GC400) {
                if (gpu->identity.revision < 0x4000)
                        gpu->identity.vertex_output_buffer_size = 512;
                else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        switch (gpu->identity.instruction_count) {
        case 0:
-               if ((gpu->identity.model == 0x2000 &&
-                    gpu->identity.revision == 0x5108) ||
-                   gpu->identity.model == 0x880)
+               if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+                   gpu->identity.model == chipModel_GC880)
                        gpu->identity.instruction_count = 512;
                else
                        gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.num_constants == 0)
                gpu->identity.num_constants = 168;
+
+       if (gpu->identity.varyings_count == 0) {
+               if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+                       gpu->identity.varyings_count = 12;
+               else
+                       gpu->identity.varyings_count = 8;
+       }
+
+       /*
+        * For some cores, two varyings are consumed for position, so the
+        * maximum varying count needs to be reduced by one.
+        */
+       if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+           etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+           etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+           etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5106))
+               gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
        /* Special case for older graphic cores. */
-       if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-            >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-               gpu->identity.model    = 0x500; /* gc500 */
-               gpu->identity.revision =
-                       (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-                       >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+       if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+               gpu->identity.model    = chipModel_GC500;
+               gpu->identity.revision = etnaviv_field(chipIdentity,
+                                        VIVS_HI_CHIP_IDENTITY_REVISION);
        } else {
 
                gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                 * same.  Only for GC400 family.
                 */
                if ((gpu->identity.model & 0xff00) == 0x0400 &&
-                   gpu->identity.model != 0x0420) {
+                   gpu->identity.model != chipModel_GC420) {
                        gpu->identity.model = gpu->identity.model & 0x0400;
                }
 
                /* Another special case */
-               if (gpu->identity.model == 0x300 &&
-                   gpu->identity.revision == 0x2201) {
+               if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
                        u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
                        u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
        /* Disable fast clear on GC700. */
-       if (gpu->identity.model == 0x700)
+       if (gpu->identity.model == chipModel_GC700)
                gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-       if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-           (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+       if ((gpu->identity.model == chipModel_GC500 &&
+            gpu->identity.revision < 2) ||
+           (gpu->identity.model == chipModel_GC300 &&
+            gpu->identity.revision < 0x2000)) {
 
                /*
                 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                gpu->identity.minor_features1 = 0;
                gpu->identity.minor_features2 = 0;
                gpu->identity.minor_features3 = 0;
+               gpu->identity.minor_features4 = 0;
+               gpu->identity.minor_features5 = 0;
        } else
                gpu->identity.minor_features0 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
                gpu->identity.minor_features3 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+               gpu->identity.minor_features4 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+               gpu->identity.minor_features5 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
        }
 
        /* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
        u16 prefetch;
 
-       if (gpu->identity.model == chipModel_GC320 &&
-           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-           (gpu->identity.revision == 0x5007 ||
-            gpu->identity.revision == 0x5220)) {
+       if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+            etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
                u32 mc_memory_debug;
 
                mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
                  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
        /* GC2000 rev 5108 needs a special bus config */
-       if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+       if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
                u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
                bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
                                VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.model == 0) {
                dev_err(gpu->dev, "Unknown GPU model\n");
-               pm_runtime_put_autosuspend(gpu->dev);
-               return -ENXIO;
+               ret = -ENXIO;
+               goto fail;
+       }
+
+       /* Exclude VG cores with FE2.0 */
+       if (gpu->identity.features & chipFeatures_PIPE_VG &&
+           gpu->identity.features & chipFeatures_FE20) {
+               dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+               ret = -ENXIO;
+               goto fail;
        }
 
        ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
                goto fail;
        }
 
-       /* TODO: we will leak here memory - fix it! */
-
        gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
        if (!gpu->mmu) {
+               iommu_domain_free(iommu);
                ret = -ENOMEM;
                goto fail;
        }
@@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
        if (!gpu->buffer) {
                ret = -ENOMEM;
                dev_err(gpu->dev, "could not create command buffer\n");
-               goto fail;
+               goto destroy_iommu;
        }
        if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
                ret = -EINVAL;
@@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 free_buffer:
        etnaviv_gpu_cmdbuf_free(gpu->buffer);
        gpu->buffer = NULL;
+destroy_iommu:
+       etnaviv_iommu_destroy(gpu->mmu);
+       gpu->mmu = NULL;
 fail:
        pm_runtime_mark_last_busy(gpu->dev);
        pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                   gpu->identity.minor_features2);
        seq_printf(m, "\t minor_features3: 0x%08x\n",
                   gpu->identity.minor_features3);
+       seq_printf(m, "\t minor_features4: 0x%08x\n",
+                  gpu->identity.minor_features4);
+       seq_printf(m, "\t minor_features5: 0x%08x\n",
+                  gpu->identity.minor_features5);
 
        seq_puts(m, "\tspecs\n");
        seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                        gpu->identity.instruction_count);
        seq_printf(m, "\t num_constants: %d\n",
                        gpu->identity.num_constants);
+       seq_printf(m, "\t varyings_count: %d\n",
+                       gpu->identity.varyings_count);
 
        seq_printf(m, "\taxi: 0x%08x\n", axi);
        seq_printf(m, "\tidle: 0x%08x\n", idle);
index c75d503..f233ac4 100644 (file)
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
        /* Supported minor feature 3 fields. */
        u32 minor_features3;
 
+       /* Supported minor feature 4 fields. */
+       u32 minor_features4;
+
+       /* Supported minor feature 5 fields. */
+       u32 minor_features5;
+
        /* Number of streams supported. */
        u32 stream_count;
 
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
 
        /* Buffer size */
        u32 buffer_size;
+
+       /* Number of varyings */
+       u8 varyings_count;
 };
 
 struct etnaviv_event {
index 0064f26..6a7de5f 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@ Copyright (C) 2015
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3                           0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3                                   0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK              0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT             4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK              0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT             0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4                           0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4                                   0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK                        0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT               12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)                   (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5                           0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID                                        0x000000a8
+
 #define VIVS_PM                                                        0x00000000
 
 #define VIVS_PM_POWER_CONTROLS                                 0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE            0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE            0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE            0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH            0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA            0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE            0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA            0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX            0x00000080
 
 #define VIVS_PM_PULSE_EATER                                    0x0000010c
 
index b79c316..673164b 100644 (file)
@@ -1392,7 +1392,7 @@ static const struct component_ops exynos_dp_ops = {
 static int exynos_dp_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
+       struct device_node *np = NULL, *endpoint = NULL;
        struct exynos_dp_device *dp;
        int ret;
 
@@ -1404,41 +1404,36 @@ static int exynos_dp_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dp);
 
        /* This is for the backward compatibility. */
-       panel_node = of_parse_phandle(dev->of_node, "panel", 0);
-       if (panel_node) {
-               dp->panel = of_drm_find_panel(panel_node);
-               of_node_put(panel_node);
+       np = of_parse_phandle(dev->of_node, "panel", 0);
+       if (np) {
+               dp->panel = of_drm_find_panel(np);
+               of_node_put(np);
                if (!dp->panel)
                        return -EPROBE_DEFER;
-       } else {
-               endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-               if (endpoint) {
-                       panel_node = of_graph_get_remote_port_parent(endpoint);
-                       if (panel_node) {
-                               dp->panel = of_drm_find_panel(panel_node);
-                               of_node_put(panel_node);
-                               if (!dp->panel)
-                                       return -EPROBE_DEFER;
-                       } else {
-                               DRM_ERROR("no port node for panel device.\n");
-                               return -EINVAL;
-                       }
-               }
-       }
-
-       if (endpoint)
                goto out;
+       }
 
        endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
        if (endpoint) {
-               bridge_node = of_graph_get_remote_port_parent(endpoint);
-               if (bridge_node) {
-                       dp->ptn_bridge = of_drm_find_bridge(bridge_node);
-                       of_node_put(bridge_node);
-                       if (!dp->ptn_bridge)
-                               return -EPROBE_DEFER;
-               } else
-                       return -EPROBE_DEFER;
+               np = of_graph_get_remote_port_parent(endpoint);
+               if (np) {
+                       /* The remote port can be either a panel or a bridge */
+                       dp->panel = of_drm_find_panel(np);
+                       if (!dp->panel) {
+                               dp->ptn_bridge = of_drm_find_bridge(np);
+                               if (!dp->ptn_bridge) {
+                                       of_node_put(np);
+                                       return -EPROBE_DEFER;
+                               }
+                       }
+                       of_node_put(np);
+               } else {
+                       DRM_ERROR("no remote endpoint device node found.\n");
+                       return -EINVAL;
+               }
+       } else {
+               DRM_ERROR("no port endpoint subnode found.\n");
+               return -EINVAL;
        }
 
 out:
index d84a498..e977a81 100644 (file)
@@ -1906,8 +1906,7 @@ static int exynos_dsi_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int exynos_dsi_suspend(struct device *dev)
+static int __maybe_unused exynos_dsi_suspend(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1938,7 +1937,7 @@ static int exynos_dsi_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_dsi_resume(struct device *dev)
+static int __maybe_unused exynos_dsi_resume(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1972,7 +1971,6 @@ err_clk:
 
        return ret;
 }
-#endif
 
 static const struct dev_pm_ops exynos_dsi_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
index b5fbc1c..0a5a600 100644 (file)
@@ -1289,8 +1289,7 @@ static int mixer_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_mixer_suspend(struct device *dev)
+static int __maybe_unused exynos_mixer_suspend(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1306,7 +1305,7 @@ static int exynos_mixer_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_mixer_resume(struct device *dev)
+static int __maybe_unused exynos_mixer_resume(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1342,7 +1341,6 @@ static int exynos_mixer_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops exynos_mixer_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
index 533d1e3..a02112b 100644 (file)
@@ -136,6 +136,7 @@ static bool adv7511_register_volatile(struct device *dev, unsigned int reg)
        case ADV7511_REG_BKSV(3):
        case ADV7511_REG_BKSV(4):
        case ADV7511_REG_DDC_STATUS:
+       case ADV7511_REG_EDID_READ_CTRL:
        case ADV7511_REG_BSTATUS(0):
        case ADV7511_REG_BSTATUS(1):
        case ADV7511_REG_CHIP_ID_HIGH:
@@ -362,24 +363,31 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 {
        adv7511->current_edid_segment = -1;
 
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                    ADV7511_INT0_EDID_READY);
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                    ADV7511_INT1_DDC_ERROR);
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                           ADV7511_POWER_POWER_DOWN, 0);
+       if (adv7511->i2c_main->irq) {
+               /*
+                * Documentation says the INT_ENABLE registers are reset in
+                * POWER_DOWN mode. My 7511w preserved the bits, however.
+                * Still, let's be safe and stick to the documentation.
+                */
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                            ADV7511_INT0_EDID_READY);
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                            ADV7511_INT1_DDC_ERROR);
+       }
 
        /*
-        * Per spec it is allowed to pulse the HDP signal to indicate that the
+        * Per spec it is allowed to pulse the HPD signal to indicate that the
         * EDID information has changed. Some monitors do this when they wakeup
-        * from standby or are enabled. When the HDP goes low the adv7511 is
+        * from standby or are enabled. When the HPD goes low the adv7511 is
         * reset and the outputs are disabled which might cause the monitor to
-        * go to standby again. To avoid this we ignore the HDP pin for the
+        * go to standby again. To avoid this we ignore the HPD pin for the
         * first few seconds after enabling the output.
         */
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                          ADV7511_REG_POWER2_HDP_SRC_MASK,
-                          ADV7511_REG_POWER2_HDP_SRC_NONE);
+                          ADV7511_REG_POWER2_HPD_SRC_MASK,
+                          ADV7511_REG_POWER2_HPD_SRC_NONE);
 
        /*
         * Most of the registers are reset during power down or when HPD is low.
@@ -413,9 +421,9 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
        if (ret < 0)
                return false;
 
-       if (irq0 & ADV7511_INT0_HDP) {
+       if (irq0 & ADV7511_INT0_HPD) {
                regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_HDP);
+                            ADV7511_INT0_HPD);
                return true;
        }
 
@@ -438,7 +446,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
        regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
        regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-       if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
+       if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
                drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
        if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
@@ -567,12 +575,14 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 
        /* Reading the EDID only works if the device is powered */
        if (!adv7511->powered) {
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_EDID_READY);
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                            ADV7511_INT1_DDC_ERROR);
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                                   ADV7511_POWER_POWER_DOWN, 0);
+               if (adv7511->i2c_main->irq) {
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                                    ADV7511_INT0_EDID_READY);
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                                    ADV7511_INT1_DDC_ERROR);
+               }
                adv7511->current_edid_segment = -1;
        }
 
@@ -638,10 +648,10 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
                if (adv7511->status == connector_status_connected)
                        status = connector_status_disconnected;
        } else {
-               /* Renable HDP sensing */
+               /* Renable HPD sensing */
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                                  ADV7511_REG_POWER2_HDP_SRC_MASK,
-                                  ADV7511_REG_POWER2_HDP_SRC_BOTH);
+                                  ADV7511_REG_POWER2_HPD_SRC_MASK,
+                                  ADV7511_REG_POWER2_HPD_SRC_BOTH);
        }
 
        adv7511->status = status;
index 6599ed5..38515b3 100644 (file)
@@ -90,7 +90,7 @@
 #define ADV7511_CSC_ENABLE                     BIT(7)
 #define ADV7511_CSC_UPDATE_MODE                        BIT(5)
 
-#define ADV7511_INT0_HDP                       BIT(7)
+#define ADV7511_INT0_HPD                       BIT(7)
 #define ADV7511_INT0_VSYNC                     BIT(5)
 #define ADV7511_INT0_AUDIO_FIFO_FULL           BIT(4)
 #define ADV7511_INT0_EDID_READY                        BIT(2)
 #define ADV7511_PACKET_ENABLE_SPARE2           BIT(1)
 #define ADV7511_PACKET_ENABLE_SPARE1           BIT(0)
 
-#define ADV7511_REG_POWER2_HDP_SRC_MASK                0xc0
-#define ADV7511_REG_POWER2_HDP_SRC_BOTH                0x00
-#define ADV7511_REG_POWER2_HDP_SRC_HDP         0x40
-#define ADV7511_REG_POWER2_HDP_SRC_CEC         0x80
-#define ADV7511_REG_POWER2_HDP_SRC_NONE                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_MASK                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_BOTH                0x00
+#define ADV7511_REG_POWER2_HPD_SRC_HPD         0x40
+#define ADV7511_REG_POWER2_HPD_SRC_CEC         0x80
+#define ADV7511_REG_POWER2_HPD_SRC_NONE                0xc0
 #define ADV7511_REG_POWER2_TDMS_ENABLE         BIT(4)
 #define ADV7511_REG_POWER2_GATE_INPUT_CLK      BIT(0)
 
index fcd77b2..051eab3 100644 (file)
@@ -10,7 +10,6 @@ config DRM_I915
        # the shmem_readpage() which depends upon tmpfs
        select SHMEM
        select TMPFS
-       select STOP_MACHINE
        select DRM_KMS_HELPER
        select DRM_PANEL
        select DRM_MIPI_DSI
index 3ac616d..f357058 100644 (file)
@@ -501,7 +501,9 @@ void intel_detect_pch(struct drm_device *dev)
                                WARN_ON(!IS_SKYLAKE(dev) &&
                                        !IS_KABYLAKE(dev));
                        } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
-                                  (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
+                                  ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
+                                   pch->subsystem_vendor == 0x1af4 &&
+                                   pch->subsystem_device == 0x1100)) {
                                dev_priv->pch_type = intel_virt_detect_pch(dev);
                        } else
                                continue;
index 2f00828..5feb657 100644 (file)
@@ -2946,7 +2946,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
        struct i915_vma *vma;
        u64 offset;
 
-       intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+       intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
                                intel_plane->base.state);
 
        vma = i915_gem_obj_to_ggtt_view(obj, &view);
@@ -12075,11 +12075,21 @@ connected_sink_compute_bpp(struct intel_connector *connector,
                pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
        }
 
-       /* Clamp bpp to 8 on screens without EDID 1.4 */
-       if (connector->base.display_info.bpc == 0 && bpp > 24) {
-               DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
-                             bpp);
-               pipe_config->pipe_bpp = 24;
+       /* Clamp bpp to default limit on screens without EDID 1.4 */
+       if (connector->base.display_info.bpc == 0) {
+               int type = connector->base.connector_type;
+               int clamp_bpp = 24;
+
+               /* Fall back to 18 bpp when DP sink capability is unknown. */
+               if (type == DRM_MODE_CONNECTOR_DisplayPort ||
+                   type == DRM_MODE_CONNECTOR_eDP)
+                       clamp_bpp = 18;
+
+               if (bpp > clamp_bpp) {
+                       DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
+                                     bpp, clamp_bpp);
+                       pipe_config->pipe_bpp = clamp_bpp;
+               }
        }
 }
 
@@ -13883,11 +13893,12 @@ intel_check_primary_plane(struct drm_plane *plane,
        int max_scale = DRM_PLANE_HELPER_NO_SCALING;
        bool can_position = false;
 
-       /* use scaler when colorkey is not required */
-       if (INTEL_INFO(plane->dev)->gen >= 9 &&
-           state->ckey.flags == I915_SET_COLORKEY_NONE) {
-               min_scale = 1;
-               max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+       if (INTEL_INFO(plane->dev)->gen >= 9) {
+               /* use scaler when colorkey is not required */
+               if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+                       min_scale = 1;
+                       max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+               }
                can_position = true;
        }
 
index 3aa6147..f1fa756 100644 (file)
@@ -1707,6 +1707,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
 
index 339701d..40c6aff 100644 (file)
@@ -331,6 +331,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
@@ -403,6 +404,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
index 6bfc463..367a916 100644 (file)
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
                unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
                        DENTIST_DPREFCLK_WDIVIDER_MASK) >>
                        DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-               if (div < 128 && div >= 96)
-                       div -= 64;
-               else if (div >= 64)
-                       div = div / 2 - 16;
-               else if (div >= 8)
-                       div /= 4;
-               else
-                       div = 0;
+               div = radeon_audio_decode_dfs_div(div);
 
                if (div)
-                       clock = rdev->clock.gpupll_outputfreq * 10 / div;
+                       clock = clock * 100 / div;
 
                WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
                WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
index 9953356..3cf04a2 100644 (file)
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
         * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
         * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
         */
+       if (ASIC_IS_DCE41(rdev)) {
+               unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+                       DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+                       DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+               div = radeon_audio_decode_dfs_div(div);
+
+               if (div)
+                       clock = 100 * clock / div;
+       }
+
        WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
        WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
index 4aa5f75..13b6029 100644 (file)
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL                     0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)            (((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK          (0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT         24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
index 5ae6db9..78a51b3 100644 (file)
@@ -268,7 +268,7 @@ struct radeon_clock {
        uint32_t current_dispclk;
        uint32_t dp_extclk;
        uint32_t max_pixel_clock;
-       uint32_t gpupll_outputfreq;
+       uint32_t vco_freq;
 };
 
 /*
index 08fc1b5..de9a2ff 100644 (file)
@@ -1106,6 +1106,31 @@ union firmware_info {
        ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+       struct radeon_mode_info *mode_info = &rdev->mode_info;
+       int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+       union igp_info *igp_info;
+       u8 frev, crev;
+       u16 data_offset;
+
+       if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+                       &frev, &crev, &data_offset)) {
+               igp_info = (union igp_info *)(mode_info->atom_context->bios +
+                       data_offset);
+               rdev->clock.vco_freq =
+                       le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+       }
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
        struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
                rdev->mode_info.firmware_flags =
                        le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-               if (ASIC_IS_DCE8(rdev)) {
-                       rdev->clock.gpupll_outputfreq =
+               if (ASIC_IS_DCE8(rdev))
+                       rdev->clock.vco_freq =
                                le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-                       if (rdev->clock.gpupll_outputfreq == 0)
-                               rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
-               }
+               else if (ASIC_IS_DCE5(rdev))
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+               else if (ASIC_IS_DCE41(rdev))
+                       radeon_atombios_get_dentist_vco_freq(rdev);
+               else
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+               if (rdev->clock.vco_freq == 0)
+                       rdev->clock.vco_freq = 360000;  /* 3.6 GHz */
 
                return true;
        }
@@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
        return false;
 }
 
-union igp_info {
-       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
        struct radeon_mode_info *mode_info = &rdev->mode_info;
index 2c02e99..b214663 100644 (file)
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
        struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-       struct radeon_connector_atom_dig *dig_connector =
-               radeon_connector->con_priv;
 
        if (!dig || !dig->afmt)
                return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
                radeon_audio_write_speaker_allocation(encoder);
                radeon_audio_write_sad_regs(encoder);
                radeon_audio_write_latency_fields(encoder, mode);
-               if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-                       radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-               else
-                       radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+               radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
                radeon_audio_set_audio_packet(encoder);
                radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
        if (radeon_encoder->audio && radeon_encoder->audio->dpms)
                radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+       if (div >= 8 && div < 64)
+               return (div - 8) * 25 + 200;
+       else if (div >= 64 && div < 96)
+               return (div - 64) * 50 + 1600;
+       else if (div >= 96 && div < 128)
+               return (div - 96) * 100 + 3200;
+       else
+               return 0;
+}
index 059cc30..5c70cce 100644 (file)
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
        struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
index b3bb923..298ea1c 100644 (file)
@@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
        /* setup afmt */
        radeon_afmt_init(rdev);
 
-       radeon_fbdev_init(rdev);
-       drm_kms_helper_poll_init(rdev->ddev);
+       if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+               radeon_fbdev_init(rdev);
+               drm_kms_helper_poll_init(rdev->ddev);
+       }
 
        /* do pm late init */
        ret = radeon_pm_late_init(rdev);
index 3dcc573..e26c963 100644 (file)
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
        bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
        if (!bo_va) {
                args->operation = RADEON_VA_RESULT_ERROR;
+               radeon_bo_unreserve(rbo);
                drm_gem_object_unreference_unlocked(gobj);
                return -ENOENT;
        }
index 84d4563..fb6ad14 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <drm/drm_cache.h>
 #include "radeon.h"
 #include "radeon_trace.h"
 
@@ -245,6 +246,12 @@ int radeon_bo_create(struct radeon_device *rdev,
                DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
                              "better performance thanks to write-combining\n");
        bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+#else
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
        radeon_ttm_placement_from_domain(bo, domain);
index 07a0d37..a01efe3 100644 (file)
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
                return -EINVAL;
        }
 
-       for (i = 0; i < sign->num; ++i) {
-               if (sign->val[i].chip_id == chip_id)
+       for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+               if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
                        break;
        }
 
-       if (i == sign->num)
+       if (i == le32_to_cpu(sign->num))
                return -EINVAL;
 
        data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
        data[1] = sign->val[i].nonce[1];
        data[2] = sign->val[i].nonce[2];
        data[3] = sign->val[i].nonce[3];
-       data[4] = sign->len + 64;
+       data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
        memset(&data[5], 0, 44);
        memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-       data += data[4] / 4;
+       data += le32_to_cpu(data[4]) / 4;
        data[0] = sign->val[i].sigval[0];
        data[1] = sign->val[i].sigval[1];
        data[2] = sign->val[i].sigval[2];
        data[3] = sign->val[i].sigval[3];
 
-       rdev->vce.keyselect = sign->val[i].keyselect;
+       rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
        return 0;
 }
index d1dc0f7..f6a809a 100644 (file)
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-               rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+               rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-                               rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
index 7bfe243..f8f8f29 100644 (file)
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-       unsigned int bpp, i, pre;
+       unsigned int i, pre;
        unsigned long mpclk, pllref, tmp;
        unsigned int m = 1, n = 1, target_mbps = 1000;
        unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+       int bpp;
 
        bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
        if (bpp < 0) {
index 8397d1b..a0d51cc 100644 (file)
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 
        return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
                                    struct device *dev)
 {
        arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
                                 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 
        priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
                                                int pipe)
index f784488..3b8f652 100644 (file)
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 
        return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
                crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *                             | <-- HW vsync irq and reg take effect
+ *            plane_commit --> |
+ *     get_vblank and wait --> |
+ *                             | <-- handle_vblank, vblank->count + 1
+ *              cleanup_fb --> |
+ *             iommu crash --> |
+ *                             | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
        struct drm_crtc_state *old_crtc_state;
        struct drm_crtc *crtc;
@@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
                if (!crtc->state->active)
                        continue;
 
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
+                       continue;
+
                ret = drm_crtc_vblank_get(crtc);
                if (ret != 0)
                        continue;
@@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
 
        drm_atomic_helper_commit_planes(dev, state, true);
 
-       rockchip_atomic_wait_for_complete(state);
+       rockchip_atomic_wait_for_complete(dev, state);
 
        drm_atomic_helper_cleanup_planes(dev, state);
 
index 50432e9..73718c5 100644 (file)
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+       return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
index d908321..18e0733 100644 (file)
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
        /*
         * align to 64 bytes since Mali requires it.
         */
-       min_pitch = ALIGN(min_pitch, 64);
-
-       if (args->pitch < min_pitch)
-               args->pitch = min_pitch;
-
-       if (args->size < args->pitch * args->height)
-               args->size = args->pitch * args->height;
+       args->pitch = ALIGN(min_pitch, 64);
+       args->size = args->pitch * args->height;
 
        rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
                                                 &args->handle);
index 46c2a8d..fd37054 100644 (file)
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
                __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-               __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+               __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
                REG_SET(x, win->base, win->phy->name, v, RELAXED)
 #define VOP_INTR_GET(vop, name) \
                vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-               REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+               REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
        do { \
-               int i, reg = 0; \
+               int i, reg = 0, mask = 0; \
                for (i = 0; i < vop->data->intr->nintrs; i++) { \
-                       if (vop->data->intr->intrs[i] & type) \
+                       if (vop->data->intr->intrs[i] & type) \
                                reg |= (v) << i; \
+                               mask |= 1 << i; \
+                       } \
                } \
-               VOP_INTR_SET(vop, name, reg); \
+               VOP_INTR_SET(vop, name, mask, reg); \
        } while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
                vop_get_intr_type(vop, &vop->data->intr->name, type)
index 424d515..314ff71 100644 (file)
@@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
 int
 vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
 {
-       if (on)
-               return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-       else
-               return pm_generic_resume(&vc4->v3d->pdev->dev);
+       /* XXX: This interface is needed for GPU reset, and the way to
+        * do it is to turn our power domain off and back on.  We
+        * can't just reset from within the driver, because the reset
+        * bits are in the power domain's register area, and get set
+        * during the poweron process.
+        */
+       return 0;
 }
 
 static void vc4_v3d_init_hw(struct drm_device *dev)
index c49812b..24fb348 100644 (file)
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static int __init vmwgfx_init(void)
 {
        int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+       if (vgacon_text_force())
+               return -EINVAL;
+#endif
+
        ret = drm_pci_init(&driver, &vmw_pci_driver);
        if (ret)
                DRM_ERROR("Failed initializing DRM.\n");
index c848789..c43318d 100644 (file)
@@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+       {
+               /*
+                * CPU fan speed going up and down on Dell Studio XPS 8000
+                * for unknown reasons.
+                */
+               .ident = "Dell Studio XPS 8000",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+               },
+       },
        {
                /*
                 * CPU fan speed going up and down on Dell Studio XPS 8100
index f77eb97..4f695d8 100644 (file)
@@ -90,7 +90,15 @@ static ssize_t show_power(struct device *dev,
        pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
                                  REG_TDP_LIMIT3, &val);
 
-       tdp_limit = val >> 16;
+       /*
+        * On Carrizo and later platforms, ApmTdpLimit bit field
+        * is extended to 16:31 from 16:28.
+        */
+       if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+               tdp_limit = val >> 16;
+       else
+               tdp_limit = (val >> 16) & 0x1fff;
+
        curr_pwr_watts = ((u64)(tdp_limit +
                                data->base_tdp)) << running_avg_range;
        curr_pwr_watts -= running_avg_capture;
index 52f708b..d50c701 100644 (file)
@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
                hwlock = radix_tree_deref_slot(slot);
                if (unlikely(!hwlock))
                        continue;
+               if (radix_tree_is_indirect_ptr(hwlock)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
 
                if (hwlock->bank->dev->of_node == args.np) {
                        ret = 0;
index ba9732c..10fbd6d 100644 (file)
@@ -874,7 +874,8 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
        i2c_set_adapdata(adap, dev);
 
        i2c_dw_disable_int(dev);
-       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+                            IRQF_SHARED | IRQF_COND_SUSPEND,
                             dev_name(dev->dev), dev);
        if (r) {
                dev_err(dev->dev, "failure requesting irq %i: %d\n",
index e045985..93f2895 100644 (file)
@@ -137,10 +137,11 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-       "SDA0", "SDA2", "SDA3", "SDA4"
+       " port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
        unsigned short smba;
@@ -148,7 +149,6 @@ struct i2c_piix4_adapdata {
        /* SB800 */
        bool sb800_main;
        unsigned short port;
-       struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
        else
                smb_en = (aux) ? 0x28 : 0x2c;
 
+       mutex_lock(&piix4_mutex_sb800);
        outb_p(smb_en, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
        outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
        smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+       mutex_unlock(&piix4_mutex_sb800);
 
        if (!smb_en) {
                smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
        u8 port;
        int retval;
 
-       mutex_lock(adapdata->mutex);
+       mutex_lock(&piix4_mutex_sb800);
 
        outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
        outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-       mutex_unlock(adapdata->mutex);
+       mutex_unlock(&piix4_mutex_sb800);
 
        return retval;
 }
@@ -625,6 +627,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+                            bool sb800_main, unsigned short port,
                             const char *name, struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
@@ -639,7 +642,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-       adap->algo = &smbus_algorithm;
+       adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+                               : &smbus_algorithm;
 
        adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
        if (adapdata == NULL) {
@@ -649,12 +653,14 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        }
 
        adapdata->smba = smba;
+       adapdata->sb800_main = sb800_main;
+       adapdata->port = port;
 
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
        snprintf(adap->name, sizeof(adap->name),
-               "SMBus PIIX4 adapter %s at %04x", name, smba);
+               "SMBus PIIX4 adapter%s at %04x", name, smba);
 
        i2c_set_adapdata(adap, adapdata);
 
@@ -673,30 +679,16 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-       struct mutex *mutex;
        struct i2c_piix4_adapdata *adapdata;
        int port;
        int retval;
 
-       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-       if (mutex == NULL)
-               return -ENOMEM;
-
-       mutex_init(mutex);
-
        for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-               retval = piix4_add_adapter(dev, smba,
+               retval = piix4_add_adapter(dev, smba, true, port,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
                        goto error;
-
-               piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-               adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-               adapdata->sb800_main = true;
-               adapdata->port = port;
-               adapdata->mutex = mutex;
        }
 
        return retval;
@@ -714,19 +706,20 @@ error:
                }
        }
 
-       kfree(mutex);
-
        return retval;
 }
 
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        int retval;
+       bool is_sb800 = false;
 
        if ((dev->vendor == PCI_VENDOR_ID_ATI &&
             dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
             dev->revision >= 0x40) ||
            dev->vendor == PCI_VENDOR_ID_AMD) {
+               is_sb800 = true;
+
                if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
                        dev_err(&dev->dev,
                        "SMBus base address index region 0x%x already in use!\n",
@@ -756,7 +749,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, "main",
+               retval = piix4_add_adapter(dev, retval, false, 0, "",
                                           &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
@@ -783,7 +776,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+               piix4_add_adapter(dev, retval, false, 0,
+                                 is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
 
@@ -798,10 +792,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
                i2c_del_adapter(adap);
                if (adapdata->port == 0) {
                        release_region(adapdata->smba, SMBIOSIZE);
-                       if (adapdata->sb800_main) {
-                               kfree(adapdata->mutex);
+                       if (adapdata->sb800_main)
                                release_region(SB800_PIIX4_SMB_IDX, 2);
-                       }
                }
                kfree(adapdata);
                kfree(adap);
index edc29b1..833ea9d 100644 (file)
@@ -213,6 +213,7 @@ config STK8312
 config STK8BA50
        tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
        depends on I2C
+       depends on IIO_TRIGGER
        help
          Say yes here to get support for the Sensortek STK8BA50 3-axis
          accelerometer.
index 605ff42..283ded7 100644 (file)
@@ -175,6 +175,7 @@ config DA9150_GPADC
 config EXYNOS_ADC
        tristate "Exynos ADC driver support"
        depends on ARCH_EXYNOS || ARCH_S3C24XX || ARCH_S3C64XX || (OF && COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          Core support for the ADC block found in the Samsung EXYNOS series
          of SoCs for drivers such as the touchscreen and hwmon to use to share
@@ -207,6 +208,7 @@ config INA2XX_ADC
 config IMX7D_ADC
        tristate "IMX7D ADC driver"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Say yes here to build support for IMX7D ADC.
 
@@ -409,6 +411,7 @@ config TWL6030_GPADC
 config VF610_ADC
        tristate "Freescale vf610 ADC driver"
        depends on OF
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index 942320e..c1e0553 100644 (file)
@@ -289,7 +289,7 @@ static int tiadc_iio_buffered_hardware_setup(struct iio_dev *indio_dev,
                goto error_kfifo_free;
 
        indio_dev->setup_ops = setup_ops;
-       indio_dev->modes |= INDIO_BUFFER_HARDWARE;
+       indio_dev->modes |= INDIO_BUFFER_SOFTWARE;
 
        return 0;
 
index 43d1458..b4dde83 100644 (file)
@@ -300,6 +300,7 @@ static int mcp4725_probe(struct i2c_client *client,
        data->client = client;
 
        indio_dev->dev.parent = &client->dev;
+       indio_dev->name = id->name;
        indio_dev->info = &mcp4725_info;
        indio_dev->channels = &mcp4725_channel;
        indio_dev->num_channels = 1;
index 1165b1c..cfc5a05 100644 (file)
@@ -117,7 +117,7 @@ static int dht11_decode(struct dht11 *dht11, int offset, int timeres)
        if (((hum_int + hum_dec + temp_int + temp_dec) & 0xff) != checksum)
                return -EIO;
 
-       dht11->timestamp = ktime_get_real_ns();
+       dht11->timestamp = ktime_get_boot_ns();
        if (hum_int < 20) {  /* DHT22 */
                dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
                                        ((temp_int & 0x80) ? -100 : 100);
@@ -145,7 +145,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
 
        /* TODO: Consider making the handler safe for IRQ sharing */
        if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
-               dht11->edges[dht11->num_edges].ts = ktime_get_real_ns();
+               dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
                dht11->edges[dht11->num_edges++].value =
                                                gpio_get_value(dht11->gpio);
 
@@ -164,7 +164,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
        int ret, timeres;
 
        mutex_lock(&dht11->lock);
-       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_real_ns()) {
+       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
                timeres = ktime_get_resolution_ns();
                if (DHT11_DATA_BIT_HIGH < 2 * timeres) {
                        dev_err(dht11->dev, "timeresolution %dns too low\n",
@@ -279,7 +279,7 @@ static int dht11_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       dht11->timestamp = ktime_get_real_ns() - DHT11_DATA_VALID_TIME - 1;
+       dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
        dht11->num_edges = -1;
 
        platform_set_drvdata(pdev, iio);
index cb32b59..36607d5 100644 (file)
@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        rx = adis->buffer;
-       tx = rx + indio_dev->scan_bytes;
+       tx = rx + scan_count;
 
        spi_message_init(&adis->msg);
 
index 48fbc0b..8f8d137 100644 (file)
@@ -5,9 +5,9 @@
 config INV_MPU6050_IIO
        tristate "Invensense MPU6050 devices"
        depends on I2C && SYSFS
+       depends on I2C_MUX
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
-       select I2C_MUX
        help
          This driver supports the Invensense MPU6050 devices.
          This driver can also support MPU6500 in MPU6050 compatibility mode
index 80fbbfd..734a004 100644 (file)
@@ -349,6 +349,8 @@ EXPORT_SYMBOL_GPL(iio_channel_get);
 
 void iio_channel_release(struct iio_channel *channel)
 {
+       if (!channel)
+               return;
        iio_device_put(channel->indio_dev);
        kfree(channel);
 }
index 60537ec..53201d9 100644 (file)
@@ -54,7 +54,9 @@ static const struct iio_chan_spec acpi_als_channels[] = {
                        .realbits       = 32,
                        .storagebits    = 32,
                },
-               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW),
+               /* _RAW is here for backward ABI compatibility */
+               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW) |
+                                         BIT(IIO_CHAN_INFO_PROCESSED),
        },
 };
 
@@ -152,7 +154,7 @@ static int acpi_als_read_raw(struct iio_dev *indio_dev,
        s32 temp_val;
        int ret;
 
-       if (mask != IIO_CHAN_INFO_RAW)
+       if ((mask != IIO_CHAN_INFO_PROCESSED) && (mask != IIO_CHAN_INFO_RAW))
                return -EINVAL;
 
        /* we support only illumination (_ALI) so far. */
index 809a961..6bf89d8 100644 (file)
@@ -180,7 +180,7 @@ static const struct ltr501_samp_table ltr501_ps_samp_table[] = {
                        {500000, 2000000}
 };
 
-static unsigned int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
+static int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
                                           int len, int val, int val2)
 {
        int i, freq;
index f5ecd6e..a0d7dee 100644 (file)
@@ -117,7 +117,7 @@ static int mpl115_read_raw(struct iio_dev *indio_dev,
                *val = ret >> 6;
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_OFFSET:
-               *val = 605;
+               *val = -605;
                *val2 = 750000;
                return IIO_VAL_INT_PLUS_MICRO;
        case IIO_CHAN_INFO_SCALE:
index 93e29fb..db35e04 100644 (file)
@@ -87,7 +87,7 @@ static int lidar_i2c_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
 
        ret = i2c_transfer(client->adapter, msg, 2);
 
-       return (ret == 2) ? 0 : ret;
+       return (ret == 2) ? 0 : -EIO;
 }
 
 static int lidar_smbus_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
index aa26f3c..8a8440c 100644 (file)
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
        depends on NET
        depends on INET
        depends on m || IPV6 != m
+       select IRQ_POLL
        ---help---
          Core support for InfiniBand (IB).  Make sure to also select
          any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
        depends on INFINIBAND
        default y
 
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+       bool
+       depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+       default y
+       ---help---
+         ConfigFS support for RDMA communication manager (CM).
+         This allows the user to config the default GID type that the CM
+         uses for each device, when initiaing new connections.
+
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
index d43a899..f818538 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) +=    ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o \
                                        $(user_access-y)
 
-ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y :=                   packer.o ud_header.o verbs.o cq.o sysfs.o \
                                device.o fmr_pool.o cache.o netlink.o \
                                roce_gid_mgmt.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y :=                    iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=                   cma.o
 
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
 rdma_ucm-y :=                  ucma.o
 
 ib_addr-y :=                   addr.o
index 34b1ada..337353d 100644 (file)
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr,
                      u16 *vlan_id)
 {
        struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
        switch (addr->sa_family) {
        case AF_INET:
                dev = ip_dev_find(dev_addr->net,
-                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+                       ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 
                if (!dev)
                        return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
                rcu_read_lock();
                for_each_netdev_rcu(dev_addr->net, dev) {
                        if (ipv6_chk_addr(dev_addr->net,
-                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         &((const struct sockaddr_in6 *)addr)->sin6_addr,
                                          dev, 1)) {
                                ret = rdma_copy_addr(dev_addr, dev, NULL);
                                if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+                       const void *daddr)
 {
        struct neighbour *n;
        int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
 }
 
 static int addr4_resolve(struct sockaddr_in *src_in,
-                        struct sockaddr_in *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct rtable **prt)
 {
        __be32 src_ip = src_in->sin_addr.s_addr;
        __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        src_in->sin_family = AF_INET;
        src_in->sin_addr.s_addr = fl4.saddr;
 
-       if (rt->dst.dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt_uses_gateway)
+               addr->network = RDMA_NETWORK_IPV4;
 
-       /* If the device does ARP internally, return 'done' */
-       if (rt->dst.dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 
-       ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
-       ip_rt_put(rt);
+       *prt = rt;
+       return 0;
 out:
        return ret;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        struct flowi6 fl6;
        struct dst_entry *dst;
+       struct rt6_info *rt;
        int ret;
 
        memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        if ((ret = dst->error))
                goto put;
 
+       rt = (struct rt6_info *)dst;
        if (ipv6_addr_any(&fl6.saddr)) {
                ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
                                         &fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                src_in->sin6_addr = fl6.saddr;
        }
 
-       if (dst->dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt6i_flags & RTF_GATEWAY)
+               addr->network = RDMA_NETWORK_IPV6;
 
-       /* If the device does ARP internally, return 'done' */
-       if (dst->dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, dst->dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip6_dst_hoplimit(dst);
 
-       ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+       *pdst = dst;
+       return 0;
 put:
        dst_release(dst);
        return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        return -EADDRNOTAVAIL;
 }
 #endif
 
+static int addr_resolve_neigh(struct dst_entry *dst,
+                             const struct sockaddr *dst_in,
+                             struct rdma_dev_addr *addr)
+{
+       if (dst->dev->flags & IFF_LOOPBACK) {
+               int ret;
+
+               ret = rdma_translate_ip(dst_in, addr, NULL);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+                              MAX_ADDR_LEN);
+
+               return ret;
+       }
+
+       /* If the device doesn't do ARP internally */
+       if (!(dst->dev->flags & IFF_NOARP)) {
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               return dst_fetch_ha(dst, addr,
+                                   dst_in->sa_family == AF_INET ?
+                                   (const void *)&dst_in4->sin_addr.s_addr :
+                                   (const void *)&dst_in6->sin6_addr);
+       }
+
+       return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
 static int addr_resolve(struct sockaddr *src_in,
-                       struct sockaddr *dst_in,
-                       struct rdma_dev_addr *addr)
+                       const struct sockaddr *dst_in,
+                       struct rdma_dev_addr *addr,
+                       bool resolve_neigh)
 {
+       struct net_device *ndev;
+       struct dst_entry *dst;
+       int ret;
+
        if (src_in->sa_family == AF_INET) {
-               return addr4_resolve((struct sockaddr_in *) src_in,
-                       (struct sockaddr_in *) dst_in, addr);
-       } else
-               return addr6_resolve((struct sockaddr_in6 *) src_in,
-                       (struct sockaddr_in6 *) dst_in, addr);
+               struct rtable *rt = NULL;
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+
+               ret = addr4_resolve((struct sockaddr_in *)src_in,
+                                   dst_in4, addr, &rt);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+               ndev = rt->dst.dev;
+               dev_hold(ndev);
+
+               ip_rt_put(rt);
+       } else {
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+                                   dst_in6, addr,
+                                   &dst);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(dst, dst_in, addr);
+
+               ndev = dst->dev;
+               dev_hold(ndev);
+
+               dst_release(dst);
+       }
+
+       addr->bound_dev_if = ndev->ifindex;
+       addr->net = dev_net(ndev);
+       dev_put(ndev);
+
+       return ret;
 }
 
 static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
                if (req->status == -ENODATA) {
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
-                       req->status = addr_resolve(src_in, dst_in, req->addr);
+                       req->status = addr_resolve(src_in, dst_in, req->addr,
+                                                  true);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->client = client;
        atomic_inc(&client->refcount);
 
-       req->status = addr_resolve(src_in, dst_in, addr);
+       req->status = addr_resolve(src_in, dst_in, addr, true);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr)
+{
+       struct sockaddr_storage ssrc_addr = {};
+       struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+       if (src_addr) {
+               if (src_addr->sa_family != dst_addr->sa_family)
+                       return -EINVAL;
+
+               memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+       } else {
+               src_in->sa_family = dst_addr->sa_family;
+       }
+
+       return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
        struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
        complete(&((struct resolve_cb_context *)context)->comp);
 }
 
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *dmac, u16 *vlan_id, int *if_index,
+                                int *hoplimit)
 {
        int ret = 0;
        struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
-       dev_addr.bound_dev_if = if_index;
+       if (if_index)
+               dev_addr.bound_dev_if = *if_index;
        dev_addr.net = &init_net;
 
        ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
        if (!dev)
                return -ENODEV;
+       if (if_index)
+               *if_index = dev_addr.bound_dev_if;
        if (vlan_id)
                *vlan_id = rdma_vlan_dev_vlan_id(dev);
+       if (hoplimit)
+               *hoplimit = dev_addr.hoplimit;
        dev_put(dev);
        return ret;
 }
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 {
index 89bebea..53343ff 100644 (file)
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
+       GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
 };
 
 enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
 };
 
 struct ib_gid_table_entry {
-       /* This lock protects an entry from being
-        * read and written simultaneously.
-        */
-       rwlock_t            lock;
        unsigned long       props;
        union ib_gid        gid;
        struct ib_gid_attr  attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
         * are locked by this lock.
         **/
        struct mutex         lock;
+       /* This lock protects the table entries from being
+        * read and written simultaneously.
+        */
+       rwlock_t             rwlock;
        struct ib_gid_table_entry *data_vec;
 };
 
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+       if (rdma_cap_roce_gid_table(ib_dev, port)) {
+               struct ib_event event;
+
+               event.device            = ib_dev;
+               event.element.port_num  = port;
+               event.event             = IB_EVENT_GID_CHANGE;
+
+               ib_dispatch_event(&event);
+       }
+}
+
+static const char * const gid_type_str[] = {
+       [IB_GID_TYPE_IB]        = "IB/RoCE v1",
+       [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+       if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+               return gid_type_str[gid_type];
+
+       return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+       unsigned int i;
+       size_t len;
+       int err = -EINVAL;
+
+       len = strlen(buf);
+       if (len == 0)
+               return -EINVAL;
+
+       if (buf[len - 1] == '\n')
+               len--;
+
+       for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+               if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+                   len == strlen(gid_type_str[i])) {
+                       err = i;
+                       break;
+               }
+
+       return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
 static int write_gid(struct ib_device *ib_dev, u8 port,
                     struct ib_gid_table *table, int ix,
                     const union ib_gid *gid,
                     const struct ib_gid_attr *attr,
                     enum gid_table_write_action action,
                     bool  default_gid)
+       __releases(&table->rwlock) __acquires(&table->rwlock)
 {
        int ret = 0;
        struct net_device *old_net_dev;
-       unsigned long flags;
 
        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
         * sleep-able lock.
         */
-       write_lock_irqsave(&table->data_vec[ix].lock, flags);
 
        if (rdma_cap_roce_gid_table(ib_dev, port)) {
                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-               write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+               write_unlock_irq(&table->rwlock);
                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
                 * RoCE providers and thus only updates the cache.
                 */
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
                else if (action == GID_TABLE_WRITE_ACTION_DEL)
                        ret = ib_dev->del_gid(ib_dev, port, ix,
                                              &table->data_vec[ix].context);
-               write_lock_irqsave(&table->data_vec[ix].lock, flags);
+               write_lock_irq(&table->rwlock);
        }
 
        old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 
        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 
-       write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
-       if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
-               struct ib_event event;
-
-               event.device            = ib_dev;
-               event.element.port_num  = port;
-               event.event             = IB_EVENT_GID_CHANGE;
-
-               ib_dispatch_event(&event);
-       }
        return ret;
 }
 
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 }
 
+/* rwlock should be read locked */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
                    const struct ib_gid_attr *val, bool default_gid,
-                   unsigned long mask)
+                   unsigned long mask, int *pempty)
 {
-       int i;
+       int i = 0;
+       int found = -1;
+       int empty = pempty ? -1 : 0;
 
-       for (i = 0; i < table->sz; i++) {
-               unsigned long flags;
-               struct ib_gid_attr *attr = &table->data_vec[i].attr;
+       while (i < table->sz && (found < 0 || empty < 0)) {
+               struct ib_gid_table_entry *data = &table->data_vec[i];
+               struct ib_gid_attr *attr = &data->attr;
+               int curr_index = i;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
+               i++;
 
-               if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
-                       goto next;
+               if (data->props & GID_TABLE_ENTRY_INVALID)
+                       continue;
+
+               if (empty < 0)
+                       if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+                           !memcmp(attr, &zattr, sizeof(*attr)) &&
+                           !data->props)
+                               empty = curr_index;
+
+               if (found >= 0)
+                       continue;
+
+               if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+                   attr->gid_type != val->gid_type)
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_GID &&
-                   memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
-                       goto next;
+                   memcmp(gid, &data->gid, sizeof(*gid)))
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
                    attr->ndev != val->ndev)
-                       goto next;
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
-                   !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+                   !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
                    default_gid)
-                       goto next;
+                       continue;
 
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-               return i;
-next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+               found = curr_index;
        }
 
-       return -1;
+       if (pempty)
+               *pempty = empty;
+
+       return found;
 }
 
 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        int ix;
        int ret = 0;
        struct net_device *idev;
+       int empty;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        }
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_NETDEV);
+                     GID_ATTR_FIND_MASK_GID_TYPE |
+                     GID_ATTR_FIND_MASK_NETDEV, &empty);
        if (ix >= 0)
                goto out_unlock;
 
-       ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_DEFAULT);
-       if (ix < 0) {
+       if (empty < 0) {
                ret = -ENOSPC;
                goto out_unlock;
        }
 
-       add_gid(ib_dev, port, table, ix, gid, attr, false);
+       ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+       if (!ret)
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return ret;
 }
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false,
                      GID_ATTR_FIND_MASK_GID      |
+                     GID_ATTR_FIND_MASK_GID_TYPE |
                      GID_ATTR_FIND_MASK_NETDEV   |
-                     GID_ATTR_FIND_MASK_DEFAULT);
+                     GID_ATTR_FIND_MASK_DEFAULT,
+                     NULL);
        if (ix < 0)
                goto out_unlock;
 
-       del_gid(ib_dev, port, table, ix, false);
+       if (!del_gid(ib_dev, port, table, ix, false))
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return 0;
 }
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        int ix;
+       bool deleted = false;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        for (ix = 0; ix < table->sz; ix++)
                if (table->data_vec[ix].attr.ndev == ndev)
-                       del_gid(ib_dev, port, table, ix, false);
+                       if (!del_gid(ib_dev, port, table, ix, false))
+                               deleted = true;
 
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
+
        return 0;
 }
 
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long flags;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        if (index < 0 || index >= table->sz)
                return -EINVAL;
 
-       read_lock_irqsave(&table->data_vec[index].lock, flags);
-       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
-               read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
                return -EAGAIN;
-       }
 
        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
        if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
                        dev_hold(attr->ndev);
        }
 
-       read_unlock_irqrestore(&table->data_vec[index].lock, flags);
        return 0;
 }
 
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
        struct ib_gid_table *table;
        u8 p;
        int local_index;
+       unsigned long flags;
 
        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
                table = ports_table[p];
-               local_index = find_gid(table, gid, val, false, mask);
+               read_lock_irqsave(&table->rwlock, flags);
+               local_index = find_gid(table, gid, val, false, mask, NULL);
                if (local_index >= 0) {
                        if (index)
                                *index = local_index;
                        if (port)
                                *port = p + rdma_start_port(ib_dev);
+                       read_unlock_irqrestore(&table->rwlock, flags);
                        return 0;
                }
+               read_unlock_irqrestore(&table->rwlock, flags);
        }
 
        return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 
 static int ib_cache_gid_find(struct ib_device *ib_dev,
                             const union ib_gid *gid,
+                            enum ib_gid_type gid_type,
                             struct net_device *ndev, u8 *port,
                             u16 *index)
 {
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
 
 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8 port, struct net_device *ndev,
                               u16 *index)
 {
        int local_index;
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+       unsigned long flags;
 
        if (port < rdma_start_port(ib_dev) ||
            port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
 
-       local_index = find_gid(table, gid, &val, false, mask);
+       read_lock_irqsave(&table->rwlock, flags);
+       local_index = find_gid(table, gid, &val, false, mask, NULL);
        if (local_index >= 0) {
                if (index)
                        *index = local_index;
+               read_unlock_irqrestore(&table->rwlock, flags);
                return 0;
        }
 
+       read_unlock_irqrestore(&table->rwlock, flags);
        return -ENOENT;
 }
 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        unsigned int i;
+       unsigned long flags;
        bool found = false;
 
        if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
+       read_lock_irqsave(&table->rwlock, flags);
        for (i = 0; i < table->sz; i++) {
                struct ib_gid_attr attr;
-               unsigned long flags;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
                        goto next;
 
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
                        found = true;
 
 next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
                if (found)
                        break;
        }
+       read_unlock_irqrestore(&table->rwlock, flags);
 
        if (!found)
                return -ENOENT;
@@ -517,9 +601,9 @@ next:
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
-       unsigned int i;
        struct ib_gid_table *table =
                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
        if (!table)
                return NULL;
 
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
        mutex_init(&table->lock);
 
        table->sz = sz;
-
-       for (i = 0; i < sz; i++)
-               rwlock_init(&table->data_vec[i].lock);
+       rwlock_init(&table->rwlock);
 
        return table;
 
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
                                   struct ib_gid_table *table)
 {
        int i;
+       bool deleted = false;
 
        if (!table)
                return;
 
+       write_lock_irq(&table->rwlock);
        for (i = 0; i < table->sz; ++i) {
                if (memcmp(&table->data_vec[i].gid, &zgid,
                           sizeof(table->data_vec[i].gid)))
-                       del_gid(ib_dev, port, table, i,
-                               table->data_vec[i].props &
-                               GID_ATTR_FIND_MASK_DEFAULT);
+                       if (!del_gid(ib_dev, port, table, i,
+                                    table->data_vec[i].props &
+                                    GID_ATTR_FIND_MASK_DEFAULT))
+                               deleted = true;
        }
+       write_unlock_irq(&table->rwlock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
 }
 
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode)
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        union ib_gid gid;
        struct ib_gid_attr gid_attr;
+       struct ib_gid_attr zattr_type = zattr;
        struct ib_gid_table *table;
-       int ix;
-       union ib_gid current_gid;
-       struct ib_gid_attr current_gid_attr = {};
+       unsigned int gid_type;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
        memset(&gid_attr, 0, sizeof(gid_attr));
        gid_attr.ndev = ndev;
 
-       mutex_lock(&table->lock);
-       ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
-       /* Coudn't find default GID location */
-       WARN_ON(ix < 0);
-
-       if (!__ib_cache_gid_get(ib_dev, port, ix,
-                               &current_gid, &current_gid_attr) &&
-           mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
-           !memcmp(&gid, &current_gid, sizeof(gid)) &&
-           !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
-               goto unlock;
-
-       if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
-            memcmp(&current_gid_attr, &zattr,
-                   sizeof(current_gid_attr))) &&
-           del_gid(ib_dev, port, table, ix, true)) {
-               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
-                       ix, gid.raw);
-               goto unlock;
-       }
+       for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+               int ix;
+               union ib_gid current_gid;
+               struct ib_gid_attr current_gid_attr = {};
+
+               if (1UL << gid_type & ~gid_type_mask)
+                       continue;
+
+               gid_attr.gid_type = gid_type;
+
+               mutex_lock(&table->lock);
+               write_lock_irq(&table->rwlock);
+               ix = find_gid(table, NULL, &gid_attr, true,
+                             GID_ATTR_FIND_MASK_GID_TYPE |
+                             GID_ATTR_FIND_MASK_DEFAULT,
+                             NULL);
+
+               /* Coudn't find default GID location */
+               WARN_ON(ix < 0);
+
+               zattr_type.gid_type = gid_type;
+
+               if (!__ib_cache_gid_get(ib_dev, port, ix,
+                                       &current_gid, &current_gid_attr) &&
+                   mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+                   !memcmp(&gid, &current_gid, sizeof(gid)) &&
+                   !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
+                       goto release;
+
+               if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
+                   memcmp(&current_gid_attr, &zattr_type,
+                          sizeof(current_gid_attr))) {
+                       if (del_gid(ib_dev, port, table, ix, true)) {
+                               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+                                       ix, gid.raw);
+                               goto release;
+                       } else {
+                               dispatch_gid_change_event(ib_dev, port);
+                       }
+               }
 
-       if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
-               if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
-                       pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
-                               gid.raw);
+               if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+                       if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+                               pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+                                       gid.raw);
+                       else
+                               dispatch_gid_change_event(ib_dev, port);
+               }
 
-unlock:
-       if (current_gid_attr.ndev)
-               dev_put(current_gid_attr.ndev);
-       mutex_unlock(&table->lock);
+release:
+               if (current_gid_attr.ndev)
+                       dev_put(current_gid_attr.ndev);
+               write_unlock_irq(&table->rwlock);
+               mutex_unlock(&table->lock);
+       }
 }
 
 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
                                     struct ib_gid_table *table)
 {
-       if (rdma_protocol_roce(ib_dev, port)) {
-               struct ib_gid_table_entry *entry = &table->data_vec[0];
+       unsigned int i;
+       unsigned long roce_gid_type_mask;
+       unsigned int num_default_gids;
+       unsigned int current_gid = 0;
+
+       roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+       num_default_gids = hweight_long(roce_gid_type_mask);
+       for (i = 0; i < num_default_gids && i < table->sz; i++) {
+               struct ib_gid_table_entry *entry =
+                       &table->data_vec[i];
 
                entry->props |= GID_TABLE_ENTRY_DEFAULT;
+               current_gid = find_next_bit(&roce_gid_type_mask,
+                                           BITS_PER_LONG,
+                                           current_gid);
+               entry->attr.gid_type = current_gid++;
        }
 
        return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
                      union ib_gid     *gid,
                      struct ib_gid_attr *gid_attr)
 {
+       int res;
+       unsigned long flags;
+       struct ib_gid_table **ports_table = device->cache.gid_cache;
+       struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
                return -EINVAL;
 
-       return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_lock_irqsave(&table->rwlock, flags);
+       res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_unlock_irqrestore(&table->rwlock, flags);
+
+       return res;
 }
 EXPORT_SYMBOL(ib_get_cached_gid);
 
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index)
 {
-       return ib_cache_gid_find(device, gid, ndev, port_num, index);
+       return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
 
        device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
        if (!use_roce_gid_table) {
+               write_lock(&table->rwlock);
                for (i = 0; i < gid_cache->table_len; i++) {
                        modify_gid(device, port, table, i, gid_cache->table + i,
                                   &zattr, false);
                }
+               write_unlock(&table->rwlock);
        }
 
        device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
index 0a26dd6..1d92e09 100644 (file)
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        read_lock_irqsave(&cm.device_lock, flags);
        list_for_each_entry(cm_dev, &cm.device_list, list) {
                if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-                                       ndev, &p, NULL)) {
+                                       path->gid_type, ndev, &p, NULL)) {
                        port = cm_dev->port[p-1];
                        break;
                }
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
        wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
 
        /* Check if the device started its remove_one */
-       spin_lock_irq(&cm.lock);
+       spin_lock_irqsave(&cm.lock, flags);
        if (!cm_dev->going_down)
                queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
                                   msecs_to_jiffies(wait_time));
-       spin_unlock_irq(&cm.lock);
+       spin_unlock_irqrestore(&cm.lock, flags);
 
        cm_id_priv->timewait_info = NULL;
 }
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
        struct ib_cm_id *cm_id;
        struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_req_msg *req_msg;
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr;
        int ret;
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 
        memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
-       ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+       ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                               work->port->port_num,
+                               cm_id_priv->av.ah_attr.grh.sgid_index,
+                               &gid, &gid_attr);
+       if (!ret) {
+               if (gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
+               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       }
        if (ret) {
-               ib_get_cached_gid(work->port->cm_dev->ib_device,
-                                 work->port->port_num, 0, &work->path[0].sgid,
-                                 NULL);
+               int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                                           work->port->port_num, 0,
+                                           &work->path[0].sgid,
+                                           &gid_attr);
+               if (!err && gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
                ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
                               &work->path[0].sgid, sizeof work->path[0].sgid,
                               NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
 EXPORT_SYMBOL(ib_cm_notify);
 
 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+                           struct ib_mad_send_buf *send_buf,
                            struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 }
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
-       struct ib_device_attr attr;
-
-       if (ib_query_device(cm_dev->ib_device, &attr))
-               cm_dev->ack_delay = 0; /* acks will rely on packet life time */
-       else
-               cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
                               char *buf)
 {
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
                return;
 
        cm_dev->ib_device = ib_device;
-       cm_get_ack_delay(cm_dev);
+       cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
        cm_dev->going_down = 0;
        cm_dev->device = device_create(&cm_class, &ib_device->dev,
                                       MKDEV(0, 0), NULL,
index 2d762a2..9729639 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/in6.h>
 #include <linux/mutex.h>
 #include <linux/random.h>
+#include <linux/igmp.h>
 #include <linux/idr.h>
 #include <linux/inetdevice.h>
 #include <linux/slab.h>
@@ -60,6 +61,8 @@
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
 
+#include "core_priv.h"
+
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
        struct completion       comp;
        atomic_t                refcount;
        struct list_head        id_list;
+       enum ib_gid_type        *default_gid_type;
 };
 
 struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
        CMA_OPTION_AFONLY,
 };
 
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+       atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie)
+{
+       struct cma_device *cma_dev;
+       struct cma_device *found_cma_dev = NULL;
+
+       mutex_lock(&lock);
+
+       list_for_each_entry(cma_dev, &dev_list, list)
+               if (filter(cma_dev->device, cookie)) {
+                       found_cma_dev = cma_dev;
+                       break;
+               }
+
+       if (found_cma_dev)
+               cma_ref_dev(found_cma_dev);
+       mutex_unlock(&lock);
+       return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port)
+{
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type)
+{
+       unsigned long supported_gids;
+
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+       if (!(supported_gids & 1 << default_gid_type))
+               return -EINVAL;
+
+       cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+               default_gid_type;
+
+       return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+       return cma_dev->device;
+}
+
 /*
  * Device removal can occur at anytime, so we need extra handling to
  * serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
        u8                      tos;
        u8                      reuseaddr;
        u8                      afonly;
+       enum ib_gid_type        gid_type;
 };
 
 struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
        void                    *context;
        struct sockaddr_storage addr;
        struct kref             mcref;
+       bool                    igmp_joined;
 };
 
 struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
-                             struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 {
-       atomic_inc(&cma_dev->refcount);
+       struct in_device *in_dev = NULL;
+
+       if (ndev) {
+               rtnl_lock();
+               in_dev = __in_dev_get_rtnl(ndev);
+               if (in_dev) {
+                       if (join)
+                               ip_mc_inc_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+                       else
+                               ip_mc_dec_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+               }
+               rtnl_unlock();
+       }
+       return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+                              struct cma_device *cma_dev)
+{
+       cma_ref_dev(cma_dev);
        id_priv->cma_dev = cma_dev;
+       id_priv->gid_type = 0;
        id_priv->id.device = cma_dev->device;
        id_priv->id.route.addr.dev_addr.transport =
                rdma_node_get_transport(cma_dev->device->node_type);
        list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+                             struct cma_device *cma_dev)
+{
+       _cma_attach_to_dev(id_priv, cma_dev);
+       id_priv->gid_type =
+               cma_dev->default_gid_type[id_priv->id.port_num -
+                                         rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
 {
        if (atomic_dec_and_test(&cma_dev->refcount))
                complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 }
 
 static inline int cma_validate_port(struct ib_device *device, u8 port,
+                                   enum ib_gid_type gid_type,
                                      union ib_gid *gid, int dev_type,
                                      int bound_if_index)
 {
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
                return ret;
 
-       if (dev_type == ARPHRD_ETHER)
+       if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
                ndev = dev_get_by_index(&init_net, bound_if_index);
+               if (ndev && ndev->flags & IFF_LOOPBACK) {
+                       pr_info("detected loopback device\n");
+                       dev_put(ndev);
 
-       ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+                       if (!device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = device->get_netdev(device, port);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+       } else {
+               gid_type = IB_GID_TYPE_IB;
+       }
+
+       ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+                                        ndev, NULL);
 
        if (ndev)
                dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                gidp = rdma_protocol_roce(cma_dev->device, port) ?
                       &iboe_gid : &gid;
 
-               ret = cma_validate_port(cma_dev->device, port, gidp,
+               ret = cma_validate_port(cma_dev->device, port,
+                                       rdma_protocol_ib(cma_dev->device, port) ?
+                                       IB_GID_TYPE_IB :
+                                       listen_id_priv->gid_type, gidp,
                                        dev_addr->dev_type,
                                        dev_addr->bound_dev_if);
                if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                        gidp = rdma_protocol_roce(cma_dev->device, port) ?
                               &iboe_gid : &gid;
 
-                       ret = cma_validate_port(cma_dev->device, port, gidp,
-                                               dev_addr->dev_type,
+                       ret = cma_validate_port(cma_dev->device, port,
+                                               rdma_protocol_ib(cma_dev->device, port) ?
+                                               IB_GID_TYPE_IB :
+                                               cma_dev->default_gid_type[port - 1],
+                                               gidp, dev_addr->dev_type,
                                                dev_addr->bound_dev_if);
                        if (!ret) {
                                id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
                                      id_priv->id.port_num)) {
                        ib_sa_free_multicast(mc->multicast.ib);
                        kfree(mc);
-               } else
+               } else {
+                       if (mc->igmp_joined) {
+                               struct rdma_dev_addr *dev_addr =
+                                       &id_priv->id.route.addr.dev_addr;
+                               struct net_device *ndev = NULL;
+
+                               if (dev_addr->bound_dev_if)
+                                       ndev = dev_get_by_index(&init_net,
+                                                               dev_addr->bound_dev_if);
+                               if (ndev) {
+                                       cma_igmp_send(ndev,
+                                                     &mc->multicast.ib->rec.mgid,
+                                                     false);
+                                       dev_put(ndev);
+                               }
+                       }
                        kref_put(&mc->mcref, release_mc);
+               }
        }
 }
 
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event;
        int ret;
-       struct ib_device_attr attr;
        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
        memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
 
-       ret = ib_query_device(conn_id->id.device, &attr);
-       if (ret) {
-               mutex_unlock(&conn_id->handler_mutex);
-               rdma_destroy_id(new_cm_id);
-               goto out;
-       }
-
        memset(&event, 0, sizeof event);
        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
        event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
               rdma_addr_size(cma_src_addr(id_priv)));
 
-       cma_attach_to_dev(dev_id_priv, cma_dev);
+       _cma_attach_to_dev(dev_id_priv, cma_dev);
        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
        atomic_inc(&id_priv->refcount);
        dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+               if (!ndev)
+                       return -ENODEV;
+
+               if (ndev->flags & IFF_LOOPBACK) {
+                       dev_put(ndev);
+                       if (!id_priv->id.device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+                                                             id_priv->id.port_num);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+
                route->path_rec->net = &init_net;
-               route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+               route->path_rec->ifindex = ndev->ifindex;
+               route->path_rec->gid_type = id_priv->gid_type;
        }
        if (!ndev) {
                ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
                    &route->path_rec->dgid);
 
-       route->path_rec->hop_limit = 1;
+       /* Use the hint from IP Stack to select GID Type */
+       if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+               route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+       if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+               /* TODO: get the hoplimit from the inet/inet6 device */
+               route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+       else
+               route->path_rec->hop_limit = 1;
        route->path_rec->reversible = 1;
        route->path_rec->pkey = cpu_to_be16(0xffff);
        route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        event.status = status;
        event.param.ud.private_data = mc->context;
        if (!status) {
+               struct rdma_dev_addr *dev_addr =
+                       &id_priv->id.route.addr.dev_addr;
+               struct net_device *ndev =
+                       dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               enum ib_gid_type gid_type =
+                       id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                       rdma_start_port(id_priv->cma_dev->device)];
+
                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
                ib_init_ah_from_mcmember(id_priv->id.device,
                                         id_priv->id.port_num, &multicast->rec,
+                                        ndev, gid_type,
                                         &event.param.ud.ah_attr);
                event.param.ud.qp_num = 0xFFFFFF;
                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+               if (ndev)
+                       dev_put(ndev);
        } else
                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 {
        struct iboe_mcast_work *work;
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-       int err;
+       int err = 0;
        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
        struct net_device *ndev = NULL;
+       enum ib_gid_type gid_type;
 
        if (cma_zero_addr((struct sockaddr *)&mc->addr))
                return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
        mc->multicast.ib->rec.hop_limit = 1;
        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+       gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                  rdma_start_port(id_priv->cma_dev->device)];
+       if (addr->sa_family == AF_INET) {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+                                           true);
+               if (!err) {
+                       mc->igmp_joined = true;
+                       mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+               }
+       } else {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = -ENOTSUPP;
+       }
        dev_put(ndev);
-       if (!mc->multicast.ib->rec.mtu) {
-               err = -EINVAL;
+       if (err || !mc->multicast.ib->rec.mtu) {
+               if (!err)
+                       err = -EINVAL;
                goto out2;
        }
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        memcpy(&mc->addr, addr, rdma_addr_size(addr));
        mc->context = context;
        mc->id_priv = id_priv;
-
+       mc->igmp_joined = false;
        spin_lock(&id_priv->lock);
        list_add(&mc->list, &id_priv->mc_list);
        spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
                                ib_sa_free_multicast(mc->multicast.ib);
                                kfree(mc);
-                       } else if (rdma_protocol_roce(id->device, id->port_num))
+                       } else if (rdma_protocol_roce(id->device, id->port_num)) {
+                               if (mc->igmp_joined) {
+                                       struct rdma_dev_addr *dev_addr =
+                                               &id->route.addr.dev_addr;
+                                       struct net_device *ndev = NULL;
+
+                                       if (dev_addr->bound_dev_if)
+                                               ndev = dev_get_by_index(&init_net,
+                                                                       dev_addr->bound_dev_if);
+                                       if (ndev) {
+                                               cma_igmp_send(ndev,
+                                                             &mc->multicast.ib->rec.mgid,
+                                                             false);
+                                               dev_put(ndev);
+                                       }
+                                       mc->igmp_joined = false;
+                               }
                                kref_put(&mc->mcref, release_mc);
-
+                       }
                        return;
                }
        }
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
 {
        struct cma_device *cma_dev;
        struct rdma_id_private *id_priv;
+       unsigned int i;
+       unsigned long supported_gids = 0;
 
        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
        if (!cma_dev)
                return;
 
        cma_dev->device = device;
+       cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+                                           sizeof(*cma_dev->default_gid_type),
+                                           GFP_KERNEL);
+       if (!cma_dev->default_gid_type) {
+               kfree(cma_dev);
+               return;
+       }
+       for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+               supported_gids = roce_gid_type_mask_support(device, i);
+               WARN_ON(!supported_gids);
+               cma_dev->default_gid_type[i - rdma_start_port(device)] =
+                       find_first_bit(&supported_gids, BITS_PER_LONG);
+       }
 
        init_completion(&cma_dev->comp);
        atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
        mutex_unlock(&lock);
 
        cma_process_remove(cma_dev);
+       kfree(cma_dev->default_gid_type);
        kfree(cma_dev);
 }
 
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
 
        if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
                printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+       cma_configfs_init();
 
        return 0;
 
@@ -4093,6 +4303,7 @@ err_wq:
 
 static void __exit cma_cleanup(void)
 {
+       cma_configfs_exit();
        ibnl_remove_client(RDMA_NL_RDMA_CM);
        ib_unregister_client(&cma_client);
        unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644 (file)
index 0000000..18b112a
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+       unsigned int            port_num;
+       struct cma_dev_group    *cma_dev_group;
+       struct config_group     group;
+};
+
+struct cma_dev_group {
+       char                            name[IB_DEVICE_NAME_MAX];
+       struct config_group             device_group;
+       struct config_group             ports_group;
+       struct config_group             *default_dev_group[2];
+       struct config_group             **default_ports_group;
+       struct cma_dev_port_group       *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+       struct config_group *group;
+
+       if (!item)
+               return NULL;
+
+       group = container_of(item, struct config_group, cg_item);
+       return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+       return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+                                  struct cma_device **pcma_dev,
+                                  struct cma_dev_port_group **pgroup)
+{
+       struct cma_dev_port_group *group = to_dev_port_group(item);
+       struct cma_device *cma_dev;
+
+       if (!group)
+               return -ENODEV;
+
+       cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                           group->cma_dev_group->name);
+       if (!cma_dev)
+               return -ENODEV;
+
+       *pcma_dev = cma_dev;
+       *pgroup = group;
+
+       return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+       cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+                                     char *buf)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type;
+       ssize_t ret;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+       cma_configfs_params_put(cma_dev);
+
+       if (gid_type < 0)
+               return gid_type;
+
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+                                      const char *buf, size_t count)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type = ib_cache_gid_parse_type_str(buf);
+       ssize_t ret;
+
+       if (gid_type < 0)
+               return -EINVAL;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+       cma_configfs_params_put(cma_dev);
+
+       return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+       &attr_default_roce_mode,
+       NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+       .ct_attrs       = cma_configfs_attributes,
+       .ct_owner       = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+                         struct cma_device *cma_dev)
+{
+       struct ib_device *ibdev;
+       unsigned int i;
+       unsigned int ports_num;
+       struct cma_dev_port_group *ports;
+       struct config_group **ports_group;
+       int err;
+
+       ibdev = cma_get_ib_dev(cma_dev);
+
+       if (!ibdev)
+               return -ENODEV;
+
+       ports_num = ibdev->phys_port_cnt;
+       ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+                       GFP_KERNEL);
+       ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+       if (!ports || !ports_group) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       for (i = 0; i < ports_num; i++) {
+               char port_str[10];
+
+               ports[i].port_num = i + 1;
+               snprintf(port_str, sizeof(port_str), "%u", i + 1);
+               ports[i].cma_dev_group = cma_dev_group;
+               config_group_init_type_name(&ports[i].group,
+                                           port_str,
+                                           &cma_port_group_type);
+               ports_group[i] = &ports[i].group;
+       }
+       ports_group[i] = NULL;
+       cma_dev_group->default_ports_group = ports_group;
+       cma_dev_group->ports = ports;
+
+       return 0;
+free:
+       kfree(ports);
+       kfree(ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+       return err;
+}
+
+static void release_cma_dev(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          device_group);
+
+       kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          ports_group);
+
+       kfree(cma_dev_group->ports);
+       kfree(cma_dev_group->default_ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+       .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+       .ct_item_ops    = &cma_ports_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+       .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+       .ct_item_ops    = &cma_device_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+                                        const char *name)
+{
+       int err = -ENODEV;
+       struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                                              (void *)name);
+       struct cma_dev_group *cma_dev_group = NULL;
+
+       if (!cma_dev)
+               goto fail;
+
+       cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+       if (!cma_dev_group) {
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+       err = make_cma_ports(cma_dev_group, cma_dev);
+       if (err)
+               goto fail;
+
+       cma_dev_group->ports_group.default_groups =
+               cma_dev_group->default_ports_group;
+       config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+                                   &cma_ports_group_type);
+
+       cma_dev_group->device_group.default_groups
+               = cma_dev_group->default_dev_group;
+       cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+       cma_dev_group->default_dev_group[1] = NULL;
+
+       config_group_init_type_name(&cma_dev_group->device_group, name,
+                                   &cma_device_group_type);
+
+       cma_deref_dev(cma_dev);
+       return &cma_dev_group->device_group;
+
+fail:
+       if (cma_dev)
+               cma_deref_dev(cma_dev);
+       kfree(cma_dev_group);
+       return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+       .make_group     = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+       .ct_group_ops   = &cma_subsys_group_ops,
+       .ct_owner       = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+       .su_group       = {
+               .cg_item        = {
+                       .ci_namebuf     = "rdma_cm",
+                       .ci_type        = &cma_subsys_type,
+               },
+       },
+};
+
+int __init cma_configfs_init(void)
+{
+       config_group_init(&cma_subsys.su_group);
+       mutex_init(&cma_subsys.su_mutex);
+       return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+       configfs_unregister_subsystem(&cma_subsys);
+}
index 5cf6eb7..eab3221 100644 (file)
 
 #include <rdma/ib_verbs.h>
 
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+       return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
 int  ib_device_register_sysfs(struct ib_device *device,
                              int (*port_callback)(struct ib_device *,
                                                   u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
        IB_CACHE_GID_DEFAULT_MODE_DELETE
 };
 
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode);
 
 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
 void roce_gid_mgmt_cleanup(void);
 
 int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
 
 int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+                                        struct net_device *upper)
+{
+       struct net_device *_upper = NULL;
+       struct list_head *iter;
+
+       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+               if (_upper == upper)
+                       break;
+
+       return _upper == upper;
+}
+
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644 (file)
index 0000000..a754fc7
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH                  16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ             256
+#define IB_POLL_BUDGET_WORKQUEUE       65536
+
+#define IB_POLL_FLAGS \
+       (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+       int i, n, completed = 0;
+
+       while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+               for (i = 0; i < n; i++) {
+                       struct ib_wc *wc = &cq->wc[i];
+
+                       if (wc->wr_cqe)
+                               wc->wr_cqe->done(cq, wc);
+                       else
+                               WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+               }
+
+               completed += n;
+
+               if (n != IB_POLL_BATCH ||
+                   (budget != -1 && completed >= budget))
+                       break;
+       }
+
+       return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq:                CQ to process
+ * @budget:    number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ.  It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling.  Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+       WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+       return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+       WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+       struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+       int completed;
+
+       completed = __ib_process_cq(cq, budget);
+       if (completed < budget) {
+               irq_poll_complete(&cq->iop);
+               if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+                       irq_poll_sched(&cq->iop);
+       }
+
+       return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+       irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+       struct ib_cq *cq = container_of(work, struct ib_cq, work);
+       int completed;
+
+       completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+       if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+           ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+               queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+       queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev:               device to allocate the CQ for
+ * @private:           driver private data, accessible from cq->cq_context
+ * @nr_cqe:            number of CQEs to allocate
+ * @comp_vector:       HCA completion vectors for this CQ
+ * @poll_ctx:          context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context.  The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+       struct ib_cq_init_attr cq_attr = {
+               .cqe            = nr_cqe,
+               .comp_vector    = comp_vector,
+       };
+       struct ib_cq *cq;
+       int ret = -ENOMEM;
+
+       cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+       if (IS_ERR(cq))
+               return cq;
+
+       cq->device = dev;
+       cq->uobject = NULL;
+       cq->event_handler = NULL;
+       cq->cq_context = private;
+       cq->poll_ctx = poll_ctx;
+       atomic_set(&cq->usecnt, 0);
+
+       cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+       if (!cq->wc)
+               goto out_destroy_cq;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               cq->comp_handler = ib_cq_completion_direct;
+               break;
+       case IB_POLL_SOFTIRQ:
+               cq->comp_handler = ib_cq_completion_softirq;
+
+               irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       case IB_POLL_WORKQUEUE:
+               cq->comp_handler = ib_cq_completion_workqueue;
+               INIT_WORK(&cq->work, ib_cq_poll_work);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       default:
+               ret = -EINVAL;
+               goto out_free_wc;
+       }
+
+       return cq;
+
+out_free_wc:
+       kfree(cq->wc);
+out_destroy_cq:
+       cq->device->destroy_cq(cq);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq:                completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+       int ret;
+
+       if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+               return;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               break;
+       case IB_POLL_SOFTIRQ:
+               irq_poll_disable(&cq->iop);
+               break;
+       case IB_POLL_WORKQUEUE:
+               flush_work(&cq->work);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+       }
+
+       kfree(cq->wc);
+       ret = cq->device->destroy_cq(cq);
+       WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
index 179e813..00da80e 100644 (file)
@@ -58,6 +58,7 @@ struct ib_client_data {
        bool              going_down;
 };
 
+struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
 {
        int ret;
        struct ib_client *client;
+       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 
        mutex_lock(&device_mutex);
 
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
                goto out;
        }
 
+       memset(&device->attrs, 0, sizeof(device->attrs));
+       ret = device->query_device(device, &device->attrs, &uhw);
+       if (ret) {
+               printk(KERN_WARNING "Couldn't query the device attributes\n");
+               goto out;
+       }
+
        ret = ib_device_register_sysfs(device, port_callback);
        if (ret) {
                printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -627,25 +636,6 @@ void ib_dispatch_event(struct ib_event *event)
 }
 EXPORT_SYMBOL(ib_dispatch_event);
 
-/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr)
-{
-       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
-       memset(device_attr, 0, sizeof(*device_attr));
-
-       return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
 /**
  * ib_query_port - Query IB port attributes
  * @device:Device to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: Type of GID.
  * @ndev: The ndev related to the GID to search for.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the GID table where the GID was found.  This
  *   parameter may be NULL.
  */
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index)
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index)
 {
        union ib_gid tmp_gid;
        int ret, port, i;
 
        for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
                if (rdma_cap_roce_gid_table(device, port)) {
-                       if (!ib_find_cached_gid_by_port(device, gid, port,
+                       if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
                                                        ndev, index)) {
                                *port_num = port;
                                return 0;
                        }
                }
 
+               if (gid_type != IB_GID_TYPE_IB)
+                       continue;
+
                for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
                        ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
                        if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
        if (!ib_wq)
                return -ENOMEM;
 
+       ib_comp_wq = alloc_workqueue("ib-comp-wq",
+                       WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+                       WQ_UNBOUND_MAX_ACTIVE);
+       if (!ib_comp_wq) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
        ret = class_register(&ib_class);
        if (ret) {
                printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
-               goto err;
+               goto err_comp;
        }
 
        ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
 
 err_sysfs:
        class_unregister(&ib_class);
-
+err_comp:
+       destroy_workqueue(ib_comp_wq);
 err:
        destroy_workqueue(ib_wq);
        return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
        ib_cache_cleanup();
        ibnl_cleanup();
        class_unregister(&ib_class);
+       destroy_workqueue(ib_comp_wq);
        /* Make sure that any pending umem accounting work is done. */
        destroy_workqueue(ib_wq);
 }
index 9f5ad7c..6ac3683 100644 (file)
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 {
        struct ib_device   *device;
        struct ib_fmr_pool *pool;
-       struct ib_device_attr *attr;
        int i;
        int ret;
        int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                return ERR_PTR(-ENOSYS);
        }
 
-       attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
-               printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
-               return ERR_PTR(-ENOMEM);
-       }
-
-       ret = ib_query_device(device, attr);
-       if (ret) {
-               printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
-               kfree(attr);
-               return ERR_PTR(ret);
-       }
-
-       if (!attr->max_map_per_fmr)
+       if (!device->attrs.max_map_per_fmr)
                max_remaps = IB_FMR_MAX_REMAPS;
        else
-               max_remaps = attr->max_map_per_fmr;
-
-       kfree(attr);
+               max_remaps = device->attrs.max_map_per_fmr;
 
        pool = kmalloc(sizeof *pool, GFP_KERNEL);
        if (!pool) {
index 2281de1..9fa5bf3 100644 (file)
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                              u8 mgmt_class);
 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                           struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+                             struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /*
  * Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
 
                atomic_inc(&mad_snoop_priv->refcount);
                spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_snoop_agent(mad_snoop_priv);
                spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
        spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 }
 
-static void build_smp_wc(struct ib_qp *qp,
-                        u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
-                        struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+               u16 pkey_index, u8 port_num, struct ib_wc *wc)
 {
        memset(wc, 0, sizeof *wc);
-       wc->wr_id = wr_id;
+       wc->wr_cqe = cqe;
        wc->status = IB_WC_SUCCESS;
        wc->opcode = IB_WC_RECV;
        wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        }
 
        build_smp_wc(mad_agent_priv->agent.qp,
-                    send_wr->wr.wr_id, drslid,
+                    send_wr->wr.wr_cqe, drslid,
                     send_wr->pkey_index,
                     send_wr->port_num, &mad_wc);
 
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 
        mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
 
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
        mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
        mad_send_wr->send_wr.wr.num_sge = 2;
        mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Set WR ID to find mad_send_wr upon completion */
        qp_info = mad_send_wr->mad_agent_priv->qp_info;
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
        mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
        sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                                /* user rmpp is in effect
                                 * and this is an active RMPP MAD
                                 */
-                               mad_recv_wc->wc->wr_id = 0;
-                               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                                  mad_recv_wc);
+                               mad_agent_priv->agent.recv_handler(
+                                               &mad_agent_priv->agent, NULL,
+                                               mad_recv_wc);
                                atomic_dec(&mad_agent_priv->refcount);
                        } else {
                                /* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
                        /* Defined behavior is to complete response before request */
-                       mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
-                       mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                          mad_recv_wc);
+                       mad_agent_priv->agent.recv_handler(
+                                       &mad_agent_priv->agent,
+                                       &mad_send_wr->send_buf,
+                                       mad_recv_wc);
                        atomic_dec(&mad_agent_priv->refcount);
 
                        mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
                }
        } else {
-               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_mad_agent(mad_agent_priv);
        }
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
        return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_qp_info *qp_info;
        struct ib_mad_private_header *mad_priv_hdr;
        struct ib_mad_private *recv, *response = NULL;
-       struct ib_mad_list_head *mad_list;
        struct ib_mad_agent_private *mad_agent;
        int port_num;
        int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        u16 resp_mad_pkey_index = 0;
        bool opa;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               /*
+                * Receive errors indicate that the QP has entered the error
+                * state - error handling/shutdown code will cleanup
+                */
+               return;
+       }
+
        qp_info = mad_list->mad_queue->qp_info;
        dequeue_mad(mad_list);
 
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        response = alloc_mad_private(mad_size, GFP_KERNEL);
        if (!response) {
                dev_err(&port_priv->device->dev,
-                       "ib_mad_recv_done_handler no memory for response buffer\n");
+                       "%s: no memory for response buffer\n", __func__);
                goto out;
        }
 
@@ -2413,11 +2430,12 @@ done:
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_send_wr_private   *mad_send_wr, *queued_send_wr;
-       struct ib_mad_list_head         *mad_list;
        struct ib_mad_qp_info           *qp_info;
        struct ib_mad_queue             *send_queue;
        struct ib_send_wr               *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
        unsigned long flags;
        int ret;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               if (!ib_mad_send_error(port_priv, wc))
+                       return;
+       }
+
        mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
                                   mad_list);
        send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
        spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 }
 
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
-                             struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+               struct ib_wc *wc)
 {
-       struct ib_mad_list_head *mad_list;
-       struct ib_mad_qp_info *qp_info;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+       struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
        struct ib_mad_send_wr_private *mad_send_wr;
        int ret;
 
-       /* Determine if failure was a send or receive */
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
-       qp_info = mad_list->mad_queue->qp_info;
-       if (mad_list->mad_queue == &qp_info->recv_queue)
-               /*
-                * Receive errors indicate that the QP has entered the error
-                * state - error handling/shutdown code will cleanup
-                */
-               return;
-
        /*
         * Send errors will transition the QP to SQE - move
         * QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        mad_send_wr->retry = 0;
                        ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
                                        &bad_send_wr);
-                       if (ret)
-                               ib_mad_send_done_handler(port_priv, wc);
-               } else
-                       ib_mad_send_done_handler(port_priv, wc);
+                       if (!ret)
+                               return false;
+               }
        } else {
                struct ib_qp_attr *attr;
 
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        kfree(attr);
                        if (ret)
                                dev_err(&port_priv->device->dev,
-                                       "mad_error_handler - ib_modify_qp to RTS : %d\n",
-                                       ret);
+                                       "%s - ib_modify_qp to RTS: %d\n",
+                                       __func__, ret);
                        else
                                mark_sends_for_retry(qp_info);
                }
-               ib_mad_send_done_handler(port_priv, wc);
        }
-}
 
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
-       struct ib_mad_port_private *port_priv;
-       struct ib_wc wc;
-
-       port_priv = container_of(work, struct ib_mad_port_private, work);
-       ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
-       while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
-               if (wc.status == IB_WC_SUCCESS) {
-                       switch (wc.opcode) {
-                       case IB_WC_SEND:
-                               ib_mad_send_done_handler(port_priv, &wc);
-                               break;
-                       case IB_WC_RECV:
-                               ib_mad_recv_done_handler(port_priv, &wc);
-                               break;
-                       default:
-                               BUG_ON(1);
-                               break;
-                       }
-               } else
-                       mad_error_handler(port_priv, &wc);
-       }
+       return true;
 }
 
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
                         * before request
                         */
                        build_smp_wc(recv_mad_agent->agent.qp,
-                                    (unsigned long) local->mad_send_wr,
+                                    local->mad_send_wr->send_wr.wr.wr_cqe,
                                     be16_to_cpu(IB_LID_PERMISSIVE),
                                     local->mad_send_wr->send_wr.pkey_index,
                                     recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
                                           IB_MAD_SNOOP_RECVS);
                        recv_mad_agent->agent.recv_handler(
                                                &recv_mad_agent->agent,
+                                               &local->mad_send_wr->send_buf,
                                                &local->mad_priv->header.recv_wc);
                        spin_lock_irqsave(&recv_mad_agent->lock, flags);
                        atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
-       struct ib_mad_port_private *port_priv = cq->cq_context;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-       if (!list_empty(&port_priv->port_list))
-               queue_work(port_priv->wq, &port_priv->work);
-       spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
 /*
  * Allocate receive MADs and post receive WRs for them
  */
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        break;
                }
                mad_priv->header.mapping = sg_list.addr;
-               recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
+               mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+               recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
 
                /* Post receive WR */
                spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
        unsigned long flags;
        char name[sizeof "ib_mad123"];
        int has_smi;
-       struct ib_cq_init_attr cq_attr = {};
 
        if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
                return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
        if (has_smi)
                cq_size *= 2;
 
-       cq_attr.cqe = cq_size;
-       port_priv->cq = ib_create_cq(port_priv->device,
-                                    ib_mad_thread_completion_handler,
-                                    NULL, port_priv, &cq_attr);
+       port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+                       IB_POLL_WORKQUEUE);
        if (IS_ERR(port_priv->cq)) {
                dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
                ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
                ret = -ENOMEM;
                goto error8;
        }
-       INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
 error6:
        ib_dealloc_pd(port_priv->pd);
 error4:
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
 error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
        destroy_mad_qp(&port_priv->qp_info[1]);
        destroy_mad_qp(&port_priv->qp_info[0]);
        ib_dealloc_pd(port_priv->pd);
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
        /* XXX: Handle deallocation of MAD registration tables */
index 990698a..28669f6 100644 (file)
@@ -64,6 +64,7 @@
 
 struct ib_mad_list_head {
        struct list_head list;
+       struct ib_cqe cqe;
        struct ib_mad_queue *mad_queue;
 };
 
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
        struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
        struct list_head agent_list;
        struct workqueue_struct *wq;
-       struct work_struct work;
        struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
index bb6685f..250937c 100644 (file)
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
 
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr)
 {
        int ret;
        u16 gid_index;
        u8 p;
 
-       ret = ib_find_cached_gid(device, &rec->port_gid,
-                                NULL, &p, &gid_index);
+       if (rdma_protocol_roce(device, port_num)) {
+               ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+                                                gid_type, port_num,
+                                                ndev,
+                                                &gid_index);
+       } else if (rdma_protocol_ib(device, port_num)) {
+               ret = ib_find_cached_gid(device, &rec->port_gid,
+                                        IB_GID_TYPE_IB, NULL, &p,
+                                        &gid_index);
+       } else {
+               ret = -EINVAL;
+       }
+
        if (ret)
                return ret;
 
index 178f984..06556c3 100644 (file)
@@ -67,17 +67,53 @@ struct netdev_event_work {
        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
 };
 
+static const struct {
+       bool (*is_supported)(const struct ib_device *device, u8 port_num);
+       enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+       {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+       {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE  ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+       int i;
+       unsigned int ret_flags = 0;
+
+       if (!rdma_protocol_roce(ib_dev, port))
+               return 1UL << IB_GID_TYPE_IB;
+
+       for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+               if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+                       ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+       return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
                       u8 port, union ib_gid *gid,
                       struct ib_gid_attr *gid_attr)
 {
-       switch (gid_op) {
-       case GID_ADD:
-               ib_cache_gid_add(ib_dev, port, gid, gid_attr);
-               break;
-       case GID_DEL:
-               ib_cache_gid_del(ib_dev, port, gid, gid_attr);
-               break;
+       int i;
+       unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+               if ((1UL << i) & gid_type_mask) {
+                       gid_attr->gid_type = i;
+                       switch (gid_op) {
+                       case GID_ADD:
+                               ib_cache_gid_add(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       case GID_DEL:
+                               ib_cache_gid_del(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       }
+               }
        }
 }
 
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
        return BONDING_SLAVE_STATE_NA;
 }
 
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
-       struct net_device *_upper = NULL;
-       struct list_head *iter;
-
-       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
-               if (_upper == upper)
-                       break;
-
-       return _upper == upper;
-}
-
 #define REQUIRED_BOND_STATES           (BONDING_SLAVE_STATE_ACTIVE |   \
                                         BONDING_SLAVE_STATE_NA)
 static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
        if (!real_dev)
                real_dev = event_ndev;
 
-       res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
                REQUIRED_BOND_STATES)) ||
               real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
                return 1;
 
        rcu_read_lock();
-       res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+       res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
        rcu_read_unlock();
 
        return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
                                     u8 port, struct net_device *event_ndev,
                                     struct net_device *rdma_ndev)
 {
+       unsigned long gid_type_mask;
+
        rcu_read_lock();
        if (!rdma_ndev ||
            ((rdma_ndev != event_ndev &&
-             !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+             !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
             is_eth_active_slave_of_bonding_rcu(rdma_ndev,
                                                netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
             BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
        }
        rcu_read_unlock();
 
-       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+       gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 }
 
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 
        rcu_read_lock();
 
-       if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
            is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
            BONDING_SLAVE_STATE_INACTIVE) {
+               unsigned long gid_type_mask;
+
                rcu_read_unlock();
 
+               gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
                ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+                                            gid_type_mask,
                                             IB_CACHE_GID_DEFAULT_MODE_DELETE);
        } else {
                rcu_read_unlock();
index a95a32b..f334090 100644 (file)
@@ -49,7 +49,9 @@
 #include <net/netlink.h>
 #include <uapi/rdma/ib_user_sa.h>
 #include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
 #include "sa.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
        struct nlattr *tb[LS_NLA_TYPE_MAX];
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
        int found = 0;
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 {
        int ret;
        u16 gid_index;
-       int force_grh;
+       int use_roce;
+       struct net_device *ndev = NULL;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
        ah_attr->port_num = port_num;
        ah_attr->static_rate = rec->rate;
 
-       force_grh = rdma_cap_eth_ah(device, port_num);
+       use_roce = rdma_cap_eth_ah(device, port_num);
+
+       if (use_roce) {
+               struct net_device *idev;
+               struct net_device *resolved_dev;
+               struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+                                                .net = rec->net ? rec->net :
+                                                        &init_net};
+               union {
+                       struct sockaddr     _sockaddr;
+                       struct sockaddr_in  _sockaddr_in;
+                       struct sockaddr_in6 _sockaddr_in6;
+               } sgid_addr, dgid_addr;
+
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+               rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+               /* validate the route */
+               ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+                                           &dgid_addr._sockaddr, &dev_addr);
+               if (ret)
+                       return ret;
 
-       if (rec->hop_limit > 1 || force_grh) {
-               struct net_device *ndev = ib_get_ndev_from_path(rec);
+               if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+                    dev_addr.network == RDMA_NETWORK_IPV6) &&
+                   rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       return -EINVAL;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               resolved_dev = dev_get_by_index(dev_addr.net,
+                                               dev_addr.bound_dev_if);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               ndev = ib_get_ndev_from_path(rec);
+               rcu_read_lock();
+               if ((ndev && ndev != resolved_dev) ||
+                   (resolved_dev != idev &&
+                    !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
+               if (ret) {
+                       if (ndev)
+                               dev_put(ndev);
+                       return ret;
+               }
+       }
 
+       if (rec->hop_limit > 1 || use_roce) {
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = rec->dgid;
 
-               ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
-                                        &gid_index);
+               ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+                                                rec->gid_type, port_num, ndev,
+                                                &gid_index);
                if (ret) {
                        if (ndev)
                                dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
                if (ndev)
                        dev_put(ndev);
        }
-       if (force_grh) {
+
+       if (use_roce)
                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
-       }
+
        return 0;
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
                          mad->data, &rec);
                rec.net = NULL;
                rec.ifindex = 0;
+               rec.gid_type = IB_GID_TYPE_IB;
                memset(rec.dmac, 0, ETH_ALEN);
                query->callback(status, &rec, query->context);
        } else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *mad_agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_sa_query *query;
-       struct ib_mad_send_buf *mad_buf;
 
-       mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
-       query = mad_buf->context[0];
+       if (!send_buf)
+               return;
 
+       query = send_buf->context[0];
        if (query->callback) {
                if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
                        query->callback(query,
index b1f37d4..3de9351 100644 (file)
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/string.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
 
+struct ib_port;
+
+struct gid_attr_group {
+       struct ib_port          *port;
+       struct kobject          kobj;
+       struct attribute_group  ndev;
+       struct attribute_group  type;
+};
 struct ib_port {
        struct kobject         kobj;
        struct ib_device      *ibdev;
+       struct gid_attr_group *gid_attr_group;
        struct attribute_group gid_group;
        struct attribute_group pkey_group;
        u8                     port_num;
+       struct attribute_group *pma_table;
 };
 
 struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
        struct port_attribute   attr;
        char                    name[8];
        int                     index;
+       __be16                  attr_id;
 };
 
 static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
        .show = port_attr_show
 };
 
+static ssize_t gid_attr_show(struct kobject *kobj,
+                            struct attribute *attr, char *buf)
+{
+       struct port_attribute *port_attr =
+               container_of(attr, struct port_attribute, attr);
+       struct ib_port *p = container_of(kobj, struct gid_attr_group,
+                                        kobj)->port;
+
+       if (!port_attr->show)
+               return -EIO;
+
+       return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+       .show = gid_attr_show
+};
+
 static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
                          char *buf)
 {
@@ -281,6 +312,46 @@ static struct attribute *port_default_attrs[] = {
        NULL
 };
 
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+       if (!gid_attr->ndev)
+               return -EINVAL;
+
+       return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+                                  struct port_attribute *attr,
+                                  char *buf,
+                                  size_t (*print)(struct ib_gid_attr *gid_attr,
+                                                  char *buf))
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr = {};
+       ssize_t ret;
+       va_list args;
+
+       ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+                          &gid_attr);
+       if (ret)
+               goto err;
+
+       ret = print(&gid_attr, buf);
+
+err:
+       if (gid_attr.ndev)
+               dev_put(gid_attr.ndev);
+       va_end(args);
+       return ret;
+}
+
 static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
                             char *buf)
 {
@@ -296,6 +367,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
        return sprintf(buf, "%pI6\n", gid.raw);
 }
 
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+                                      struct port_attribute *attr, char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+                                          struct port_attribute *attr,
+                                          char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
 static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
                              char *buf)
 {
@@ -314,24 +398,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
 #define PORT_PMA_ATTR(_name, _counter, _width, _offset)                        \
 struct port_table_attribute port_pma_attr_##_name = {                  \
        .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
-       .index = (_offset) | ((_width) << 16) | ((_counter) << 24)      \
+       .index = (_offset) | ((_width) << 16) | ((_counter) << 24),     \
+       .attr_id = IB_PMA_PORT_COUNTERS ,                               \
 }
 
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
-                               char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset)                      \
+struct port_table_attribute port_pma_attr_ext_##_name = {              \
+       .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
+       .index = (_offset) | ((_width) << 16),                          \
+       .attr_id = IB_PMA_PORT_COUNTERS_EXT ,                           \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+               void *data, int offset, size_t size)
 {
-       struct port_table_attribute *tab_attr =
-               container_of(attr, struct port_table_attribute, attr);
-       int offset = tab_attr->index & 0xffff;
-       int width  = (tab_attr->index >> 16) & 0xff;
-       struct ib_mad *in_mad  = NULL;
-       struct ib_mad *out_mad = NULL;
+       struct ib_mad *in_mad;
+       struct ib_mad *out_mad;
        size_t mad_size = sizeof(*out_mad);
        u16 out_mad_pkey_index = 0;
        ssize_t ret;
 
-       if (!p->ibdev->process_mad)
-               return sprintf(buf, "N/A (no PMA)\n");
+       if (!dev->process_mad)
+               return -ENOSYS;
 
        in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
        out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +436,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
        in_mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_PERF_MGMT;
        in_mad->mad_hdr.class_version = 1;
        in_mad->mad_hdr.method        = IB_MGMT_METHOD_GET;
-       in_mad->mad_hdr.attr_id       = cpu_to_be16(0x12); /* PortCounters */
+       in_mad->mad_hdr.attr_id       = attr;
 
-       in_mad->data[41] = p->port_num; /* PortSelect field */
+       if (attr != IB_PMA_CLASS_PORT_INFO)
+               in_mad->data[41] = port_num;    /* PortSelect field */
 
-       if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
-                p->port_num, NULL, NULL,
+       if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+                port_num, NULL, NULL,
                 (const struct ib_mad_hdr *)in_mad, mad_size,
                 (struct ib_mad_hdr *)out_mad, &mad_size,
                 &out_mad_pkey_index) &
@@ -358,31 +451,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
                ret = -EINVAL;
                goto out;
        }
+       memcpy(data, out_mad->data + offset, size);
+       ret = size;
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+                               char *buf)
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       int offset = tab_attr->index & 0xffff;
+       int width  = (tab_attr->index >> 16) & 0xff;
+       ssize_t ret;
+       u8 data[8];
+
+       ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+                       40 + offset / 8, sizeof(data));
+       if (ret < 0)
+               return sprintf(buf, "N/A (no PMA)\n");
 
        switch (width) {
        case 4:
-               ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+               ret = sprintf(buf, "%u\n", (*data >>
                                            (4 - (offset % 8))) & 0xf);
                break;
        case 8:
-               ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+               ret = sprintf(buf, "%u\n", *data);
                break;
        case 16:
                ret = sprintf(buf, "%u\n",
-                             be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+                             be16_to_cpup((__be16 *)data));
                break;
        case 32:
                ret = sprintf(buf, "%u\n",
-                             be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+                             be32_to_cpup((__be32 *)data));
+               break;
+       case 64:
+               ret = sprintf(buf, "%llu\n",
+                               be64_to_cpup((__be64 *)data));
                break;
+
        default:
                ret = 0;
        }
 
-out:
-       kfree(in_mad);
-       kfree(out_mad);
-
        return ret;
 }
 
@@ -403,6 +519,18 @@ static PORT_PMA_ATTR(port_rcv_data             , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets             , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets              , 15, 32, 288);
 
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data                    , 64,  64);
+static PORT_PMA_ATTR_EXT(port_rcv_data             , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets         , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets          , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets      , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets       , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets            , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets     , 64, 512);
+
 static struct attribute *pma_attrs[] = {
        &port_pma_attr_symbol_error.attr.attr,
        &port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +551,65 @@ static struct attribute *pma_attrs[] = {
        NULL
 };
 
+static struct attribute *pma_attrs_ext[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+       &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+       NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       NULL
+};
+
 static struct attribute_group pma_group = {
        .name  = "counters",
        .attrs  = pma_attrs
 };
 
+static struct attribute_group pma_group_ext = {
+       .name  = "counters",
+       .attrs  = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+       .name  = "counters",
+       .attrs  = pma_attrs_noietf
+};
+
 static void ib_port_release(struct kobject *kobj)
 {
        struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +633,41 @@ static void ib_port_release(struct kobject *kobj)
        kfree(p);
 }
 
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+       struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+                                               kobj);
+       struct attribute *a;
+       int i;
+
+       if (g->ndev.attrs) {
+               for (i = 0; (a = g->ndev.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->ndev.attrs);
+       }
+
+       if (g->type.attrs) {
+               for (i = 0; (a = g->type.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->type.attrs);
+       }
+
+       kfree(g);
+}
+
 static struct kobj_type port_type = {
        .release       = ib_port_release,
        .sysfs_ops     = &port_sysfs_ops,
        .default_attrs = port_default_attrs
 };
 
+static struct kobj_type gid_attr_type = {
+       .sysfs_ops      = &gid_attr_sysfs_ops,
+       .release        = ib_port_gid_attr_release
+};
+
 static struct attribute **
 alloc_group_attrs(ssize_t (*show)(struct ib_port *,
                                  struct port_attribute *, char *buf),
@@ -500,6 +711,31 @@ err:
        return NULL;
 }
 
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+                                                int port_num)
+{
+       struct ib_class_port_info cpi;
+
+       if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+                               &cpi, 40, sizeof(cpi)) >= 0) {
+
+               if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+                       /* We have extended counters */
+                       return &pma_group_ext;
+
+               if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+                       /* But not the IETF ones */
+                       return &pma_group_noietf;
+       }
+
+       /* Fall back to normal counters */
+       return &pma_group;
+}
+
 static int add_port(struct ib_device *device, int port_num,
                    int (*port_callback)(struct ib_device *,
                                         u8, struct kobject *))
@@ -528,9 +764,24 @@ static int add_port(struct ib_device *device, int port_num,
                return ret;
        }
 
-       ret = sysfs_create_group(&p->kobj, &pma_group);
-       if (ret)
+       p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+       if (!p->gid_attr_group) {
+               ret = -ENOMEM;
                goto err_put;
+       }
+
+       p->gid_attr_group->port = p;
+       ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+                                  &p->kobj, "gid_attrs");
+       if (ret) {
+               kfree(p->gid_attr_group);
+               goto err_put;
+       }
+
+       p->pma_table = get_counter_table(device, port_num);
+       ret = sysfs_create_group(&p->kobj, p->pma_table);
+       if (ret)
+               goto err_put_gid_attrs;
 
        p->gid_group.name  = "gids";
        p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +794,38 @@ static int add_port(struct ib_device *device, int port_num,
        if (ret)
                goto err_free_gid;
 
+       p->gid_attr_group->ndev.name = "ndevs";
+       p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->ndev.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->ndev);
+       if (ret)
+               goto err_free_gid_ndev;
+
+       p->gid_attr_group->type.name = "types";
+       p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->type.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid_ndev;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->type);
+       if (ret)
+               goto err_free_gid_type;
+
        p->pkey_group.name  = "pkeys";
        p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
                                                attr.pkey_tbl_len);
        if (!p->pkey_group.attrs) {
                ret = -ENOMEM;
-               goto err_remove_gid;
+               goto err_remove_gid_type;
        }
 
        ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +853,28 @@ err_free_pkey:
        kfree(p->pkey_group.attrs);
        p->pkey_group.attrs = NULL;
 
+err_remove_gid_type:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->type);
+
+err_free_gid_type:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->type.attrs[i]);
+
+       kfree(p->gid_attr_group->type.attrs);
+       p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->ndev.attrs[i]);
+
+       kfree(p->gid_attr_group->ndev.attrs);
+       p->gid_attr_group->ndev.attrs = NULL;
+
 err_remove_gid:
        sysfs_remove_group(&p->kobj, &p->gid_group);
 
@@ -587,7 +886,10 @@ err_free_gid:
        p->gid_group.attrs = NULL;
 
 err_remove_pma:
-       sysfs_remove_group(&p->kobj, &pma_group);
+       sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+       kobject_put(&p->gid_attr_group->kobj);
 
 err_put:
        kobject_put(&p->kobj);
@@ -614,18 +916,12 @@ static ssize_t show_sys_image_guid(struct device *device,
                                   struct device_attribute *dev_attr, char *buf)
 {
        struct ib_device *dev = container_of(device, struct ib_device, dev);
-       struct ib_device_attr attr;
-       ssize_t ret;
-
-       ret = ib_query_device(dev, &attr);
-       if (ret)
-               return ret;
 
        return sprintf(buf, "%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
 }
 
 static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1096,14 @@ static void free_port_list_attributes(struct ib_device *device)
        list_for_each_entry_safe(p, t, &device->port_list, entry) {
                struct ib_port *port = container_of(p, struct ib_port, kobj);
                list_del(&p->entry);
-               sysfs_remove_group(p, &pma_group);
+               sysfs_remove_group(p, port->pma_table);
                sysfs_remove_group(p, &port->pkey_group);
                sysfs_remove_group(p, &port->gid_group);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->ndev);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->type);
+               kobject_put(&port->gid_attr_group->kobj);
                kobject_put(p);
        }
 
index 72feee6..2116132 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <rdma/ib_pack.h>
 
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[]  = {
          .size_bits    = 16 }
 };
 
+static const struct ib_field ip4_table[]  = {
+       { STRUCT_FIELD(ip4, ver),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, hdr_len),
+         .offset_words = 0,
+         .offset_bits  = 4,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, tos),
+         .offset_words = 0,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, tot_len),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, id),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, frag_off),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, ttl),
+         .offset_words = 2,
+         .offset_bits  = 0,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, protocol),
+         .offset_words = 2,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, check),
+         .offset_words = 2,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, saddr),
+         .offset_words = 3,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { STRUCT_FIELD(ip4, daddr),
+         .offset_words = 4,
+         .offset_bits  = 0,
+         .size_bits    = 32 }
+};
+
+static const struct ib_field udp_table[]  = {
+       { STRUCT_FIELD(udp, sport),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, dport),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, length),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, csum),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 }
+};
+
 static const struct ib_field grh_table[]  = {
        { STRUCT_FIELD(grh, ip_version),
          .offset_words = 0,
@@ -213,26 +280,59 @@ static const struct ib_field deth_table[] = {
          .size_bits    = 24 }
 };
 
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+       struct iphdr iph;
+
+       iph.ihl         = 5;
+       iph.version     = 4;
+       iph.tos         = header->ip4.tos;
+       iph.tot_len     = header->ip4.tot_len;
+       iph.id          = header->ip4.id;
+       iph.frag_off    = header->ip4.frag_off;
+       iph.ttl         = header->ip4.ttl;
+       iph.protocol    = header->ip4.protocol;
+       iph.check       = 0;
+       iph.saddr       = header->ip4.saddr;
+       iph.daddr       = header->ip4.daddr;
+
+       return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
 /**
  * ib_ud_header_init - Initialize UD header structure
  * @payload_bytes:Length of packet payload
  * @lrh_present: specify if LRH is present
  * @eth_present: specify if Eth header is present
  * @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
  * @immediate_present: specify if immediate data is present
  * @header:Structure to initialize
  */
-void ib_ud_header_init(int                         payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header)
+int ib_ud_header_init(int     payload_bytes,
+                     int    lrh_present,
+                     int    eth_present,
+                     int    vlan_present,
+                     int    grh_present,
+                     int    ip_version,
+                     int    udp_present,
+                     int    immediate_present,
+                     struct ib_ud_header *header)
 {
+       size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+       grh_present = grh_present && !ip_version;
        memset(header, 0, sizeof *header);
 
+       /*
+        * UDP header without IP header doesn't make sense
+        */
+       if (udp_present && ip_version != 4 && ip_version != 6)
+               return -EINVAL;
+
        if (lrh_present) {
                u16 packet_length;
 
@@ -252,16 +352,37 @@ void ib_ud_header_init(int                    payload_bytes,
        if (vlan_present)
                header->eth.type = cpu_to_be16(ETH_P_8021Q);
 
-       if (grh_present) {
+       if (ip_version == 6 || grh_present) {
                header->grh.ip_version      = 6;
                header->grh.payload_length  =
-                       cpu_to_be16((IB_BTH_BYTES     +
+                       cpu_to_be16((udp_bytes        +
+                                    IB_BTH_BYTES     +
                                     IB_DETH_BYTES    +
                                     payload_bytes    +
                                     4                + /* ICRC     */
                                     3) & ~3);          /* round up */
-               header->grh.next_header     = 0x1b;
+               header->grh.next_header     = udp_present ? IPPROTO_UDP : 0x1b;
+       }
+
+       if (ip_version == 4) {
+               header->ip4.ver = 4; /* version 4 */
+               header->ip4.hdr_len = 5; /* 5 words */
+               header->ip4.tot_len =
+                       cpu_to_be16(IB_IP4_BYTES   +
+                                    udp_bytes     +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
+               header->ip4.protocol = IPPROTO_UDP;
        }
+       if (udp_present && ip_version)
+               header->udp.length =
+                       cpu_to_be16(IB_UDP_BYTES   +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
 
        if (immediate_present)
                header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +394,11 @@ void ib_ud_header_init(int                     payload_bytes,
        header->lrh_present = lrh_present;
        header->eth_present = eth_present;
        header->vlan_present = vlan_present;
-       header->grh_present = grh_present;
+       header->grh_present = grh_present || (ip_version == 6);
+       header->ipv4_present = ip_version == 4;
+       header->udp_present = udp_present;
        header->immediate_present = immediate_present;
+       return 0;
 }
 EXPORT_SYMBOL(ib_ud_header_init);
 
@@ -311,6 +435,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
                        &header->grh, buf + len);
                len += IB_GRH_BYTES;
        }
+       if (header->ipv4_present) {
+               ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+                       &header->ip4, buf + len);
+               len += IB_IP4_BYTES;
+       }
+       if (header->udp_present) {
+               ib_pack(udp_table, ARRAY_SIZE(udp_table),
+                       &header->udp, buf + len);
+               len += IB_UDP_BYTES;
+       }
 
        ib_pack(bth_table, ARRAY_SIZE(bth_table),
                &header->bth, buf + len);
index 40becdb..e69bf26 100644 (file)
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
        ib_ucontext_notifier_end_account(context);
 }
 
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
        .release                    = ib_umem_notifier_release,
        .invalidate_page            = ib_umem_notifier_invalidate_page,
        .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
index 57f281f..415a318 100644 (file)
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_umad_file *file = agent->context;
index 94bbd8c..612ccfd 100644 (file)
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
 struct ib_uverbs_flow_spec {
        union {
                union {
index 1c02dea..6ffc9c4 100644 (file)
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        struct ib_uverbs_get_context      cmd;
        struct ib_uverbs_get_context_resp resp;
        struct ib_udata                   udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       struct ib_device_attr             dev_attr;
-#endif
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
        int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        ucontext->odp_mrs_count = 0;
        INIT_LIST_HEAD(&ucontext->no_private_counters);
 
-       ret = ib_query_device(ib_dev, &dev_attr);
-       if (ret)
-               goto err_free;
-       if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+       if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
                ucontext->invalidate_range = NULL;
 
 #endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 {
        struct ib_uverbs_query_device      cmd;
        struct ib_uverbs_query_device_resp resp;
-       struct ib_device_attr              attr;
-       int                                ret;
 
        if (out_len < sizeof resp)
                return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       ret = ib_query_device(ib_dev, &attr);
-       if (ret)
-               return ret;
-
        memset(&resp, 0, sizeof resp);
-       copy_query_dev_fields(file, ib_dev, &resp, &attr);
+       copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        }
 
        if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
-               struct ib_device_attr attr;
-
-               ret = ib_query_device(pd->device, &attr);
-               if (ret || !(attr.device_cap_flags &
-                               IB_DEVICE_ON_DEMAND_PAGING)) {
+               if (!(pd->device->attrs.device_cap_flags &
+                     IB_DEVICE_ON_DEMAND_PAGING)) {
                        pr_debug("ODP support not available\n");
                        ret = -EINVAL;
                        goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        mr->pd      = pd;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
-       atomic_set(&mr->usecnt, 0);
 
        uobj->object = mr;
        ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
                }
        }
 
-       if (atomic_read(&mr->usecnt)) {
-               ret = -EBUSY;
-               goto put_uobj_pd;
-       }
-
        old_pd = mr->pd;
        ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
                                        cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
        idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 
 err_unalloc:
-       ib_dealloc_mw(mw);
+       uverbs_dealloc_mw(mw);
 
 err_put:
        put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 
        mw = uobj->object;
 
-       ret = ib_dealloc_mw(mw);
+       ret = uverbs_dealloc_mw(mw);
        if (!ret)
                uobj->live = 0;
 
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
                      sizeof(cmd->create_flags))
                attr.create_flags = cmd->create_flags;
 
-       if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+       if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+                               IB_QP_CREATE_CROSS_CHANNEL |
+                               IB_QP_CREATE_MANAGED_SEND |
+                               IB_QP_CREATE_MANAGED_RECV)) {
                ret = -EINVAL;
                goto err_put;
        }
index e3ef288..39680ae 100644 (file)
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+       struct ib_pd *pd = mw->pd;
+       int ret;
+
+       ret = mw->device->dealloc_mw(mw);
+       if (!ret)
+               atomic_dec(&pd->usecnt);
+       return ret;
+}
+
 static void ib_uverbs_release_dev(struct kobject *kobj)
 {
        struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                struct ib_mw *mw = uobj->object;
 
                idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-               ib_dealloc_mw(mw);
+               uverbs_dealloc_mw(mw);
                kfree(uobj);
        }
 
index 7d2f14c..af020f8 100644 (file)
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
        memset(dst->dmac, 0, sizeof(dst->dmac));
        dst->net = NULL;
        dst->ifindex = 0;
+       dst->gid_type = IB_GID_TYPE_IB;
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
index 545906d..5af6d02 100644 (file)
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
 {
        struct ib_pd *pd;
-       struct ib_device_attr devattr;
-       int rc;
-
-       rc = ib_query_device(device, &devattr);
-       if (rc)
-               return ERR_PTR(rc);
 
        pd = device->alloc_pd(device, NULL, NULL);
        if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
        pd->local_mr = NULL;
        atomic_set(&pd->usecnt, 0);
 
-       if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+       if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
                pd->local_dma_lkey = device->local_dma_lkey;
        else {
                struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+       const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+       struct iphdr ip4h_checked;
+       const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+       /* If it's IPv6, the version must be 6, otherwise, the first
+        * 20 bytes (before the IPv4 header) are garbled.
+        */
+       if (ip6h->version != 6)
+               return (ip4h->version == 4) ? 4 : 0;
+       /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+       /* RoCE v2 requires no options, thus header length
+        * must be 5 words
+        */
+       if (ip4h->ihl != 5)
+               return 6;
+
+       /* Verify checksum.
+        * We can't write on scattered buffers so we need to copy to
+        * temp buffer.
+        */
+       memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+       ip4h_checked.check = 0;
+       ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+       /* if IPv4 header checksum is OK, believe it */
+       if (ip4h->check == ip4h_checked.check)
+               return 4;
+       return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+                                                    u8 port_num,
+                                                    const struct ib_grh *grh)
+{
+       int grh_version;
+
+       if (rdma_protocol_ib(device, port_num))
+               return RDMA_NETWORK_IB;
+
+       grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+       if (grh_version == 4)
+               return RDMA_NETWORK_IPV4;
+
+       if (grh->next_hdr == IPPROTO_UDP)
+               return RDMA_NETWORK_IPV6;
+
+       return RDMA_NETWORK_ROCE_V1;
+}
+
 struct find_gid_index_context {
        u16 vlan_id;
+       enum ib_gid_type gid_type;
 };
 
 static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
        struct find_gid_index_context *ctx =
                (struct find_gid_index_context *)context;
 
+       if (ctx->gid_type != gid_attr->gid_type)
+               return false;
+
        if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
            (is_vlan_dev(gid_attr->ndev) &&
             vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
 
 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
                                   u16 vlan_id, const union ib_gid *sgid,
+                                  enum ib_gid_type gid_type,
                                   u16 *gid_index)
 {
-       struct find_gid_index_context context = {.vlan_id = vlan_id};
+       struct find_gid_index_context context = {.vlan_id = vlan_id,
+                                                .gid_type = gid_type};
 
        return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
                                     &context, gid_index);
 }
 
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+                                 enum rdma_network_type net_type,
+                                 union ib_gid *sgid, union ib_gid *dgid)
+{
+       struct sockaddr_in  src_in;
+       struct sockaddr_in  dst_in;
+       __be32 src_saddr, dst_saddr;
+
+       if (!sgid || !dgid)
+               return -EINVAL;
+
+       if (net_type == RDMA_NETWORK_IPV4) {
+               memcpy(&src_in.sin_addr.s_addr,
+                      &hdr->roce4grh.saddr, 4);
+               memcpy(&dst_in.sin_addr.s_addr,
+                      &hdr->roce4grh.daddr, 4);
+               src_saddr = src_in.sin_addr.s_addr;
+               dst_saddr = dst_in.sin_addr.s_addr;
+               ipv6_addr_set_v4mapped(src_saddr,
+                                      (struct in6_addr *)sgid);
+               ipv6_addr_set_v4mapped(dst_saddr,
+                                      (struct in6_addr *)dgid);
+               return 0;
+       } else if (net_type == RDMA_NETWORK_IPV6 ||
+                  net_type == RDMA_NETWORK_IB) {
+               *dgid = hdr->ibgrh.dgid;
+               *sgid = hdr->ibgrh.sgid;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                       const struct ib_wc *wc, const struct ib_grh *grh,
                       struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
        u32 flow_class;
        u16 gid_index;
        int ret;
+       enum rdma_network_type net_type = RDMA_NETWORK_IB;
+       enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+       int hoplimit = 0xff;
+       union ib_gid dgid;
+       union ib_gid sgid;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        if (rdma_cap_eth_ah(device, port_num)) {
+               if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+                       net_type = wc->network_hdr_type;
+               else
+                       net_type = ib_get_net_type_by_grh(device, port_num, grh);
+               gid_type = ib_network_to_gid_type(net_type);
+       }
+       ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+                                    &sgid, &dgid);
+       if (ret)
+               return ret;
+
+       if (rdma_protocol_roce(device, port_num)) {
+               int if_index = 0;
                u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
                                wc->vlan_id : 0xffff;
+               struct net_device *idev;
+               struct net_device *resolved_dev;
 
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
-               if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
-                   !(wc->wc_flags & IB_WC_WITH_VLAN)) {
-                       ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                                        ah_attr->dmac,
-                                                        wc->wc_flags & IB_WC_WITH_VLAN ?
-                                                        NULL : &vlan_id,
-                                                        0);
-                       if (ret)
-                               return ret;
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+                                                  ah_attr->dmac,
+                                                  wc->wc_flags & IB_WC_WITH_VLAN ?
+                                                  NULL : &vlan_id,
+                                                  &if_index, &hoplimit);
+               if (ret) {
+                       dev_put(idev);
+                       return ret;
                }
 
-               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
-                                             &grh->dgid, &gid_index);
+               resolved_dev = dev_get_by_index(&init_net, if_index);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               rcu_read_lock();
+               if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+                                                                  resolved_dev))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
                if (ret)
                        return ret;
 
-               if (wc->wc_flags & IB_WC_WITH_SMAC)
-                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+                                             &dgid, gid_type, &gid_index);
+               if (ret)
+                       return ret;
        }
 
        ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 
        if (wc->wc_flags & IB_WC_GRH) {
                ah_attr->ah_flags = IB_AH_GRH;
-               ah_attr->grh.dgid = grh->sgid;
+               ah_attr->grh.dgid = sgid;
 
                if (!rdma_cap_eth_ah(device, port_num)) {
-                       ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+                       ret = ib_find_cached_gid_by_port(device, &dgid,
+                                                        IB_GID_TYPE_IB,
                                                         port_num, NULL,
                                                         &gid_index);
                        if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
                ah_attr->grh.flow_label = flow_class & 0xFFFFF;
-               ah_attr->grh.hop_limit = 0xFF;
+               ah_attr->grh.hop_limit = hoplimit;
                ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
        }
        return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
                        union ib_gid            sgid;
                        struct ib_gid_attr      sgid_attr;
                        int                     ifindex;
+                       int                     hop_limit;
 
                        ret = ib_query_gid(qp->device,
                                           qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
 
                        ifindex = sgid_attr.ndev->ifindex;
 
-                       ret = rdma_addr_find_dmac_by_grh(&sgid,
-                                                        &qp_attr->ah_attr.grh.dgid,
-                                                        qp_attr->ah_attr.dmac,
-                                                        NULL, ifindex);
+                       ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+                                                          &qp_attr->ah_attr.grh.dgid,
+                                                          qp_attr->ah_attr.dmac,
+                                                          NULL, &ifindex, &hop_limit);
 
                        dev_put(sgid_attr.ndev);
+
+                       qp_attr->ah_attr.grh.hop_limit = hop_limit;
                }
        }
 out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_get_dma_mr);
 
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       return mr->device->query_mr ?
-               mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
 int ib_dereg_mr(struct ib_mr *mr)
 {
-       struct ib_pd *pd;
+       struct ib_pd *pd = mr->pd;
        int ret;
 
-       if (atomic_read(&mr->usecnt))
-               return -EBUSY;
-
-       pd = mr->pd;
        ret = mr->device->dereg_mr(mr);
        if (!ret)
                atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_alloc_mr);
 
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *mw;
-
-       if (!pd->device->alloc_mw)
-               return ERR_PTR(-ENOSYS);
-
-       mw = pd->device->alloc_mw(pd, type);
-       if (!IS_ERR(mw)) {
-               mw->device  = pd->device;
-               mw->pd      = pd;
-               mw->uobject = NULL;
-               mw->type    = type;
-               atomic_inc(&pd->usecnt);
-       }
-
-       return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
-       struct ib_pd *pd;
-       int ret;
-
-       pd = mw->pd;
-       ret = mw->device->dealloc_mw(mw);
-       if (!ret)
-               atomic_dec(&pd->usecnt);
-
-       return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
 /* "Fast" memory regions */
 
 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
                   int (*set_page)(struct ib_mr *, u64))
 {
        struct scatterlist *sg;
-       u64 last_end_dma_addr = 0, last_page_addr = 0;
+       u64 last_end_dma_addr = 0;
        unsigned int last_page_off = 0;
        u64 page_mask = ~((u64)mr->page_size - 1);
        int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
 
                mr->length += dma_len;
                last_end_dma_addr = end_dma_addr;
-               last_page_addr = end_dma_addr & page_mask;
                last_page_off = end_dma_addr & ~page_mask;
        }
 
index cb78b1e..f504ba7 100644 (file)
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
        error = l2t_send(tdev, skb, l2e);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
        error = cxgb3_ofld_send(tdev, skb);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
index cfe4049..97fbfd2 100644 (file)
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                case T3_SEND_WITH_SE_INV:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case T3_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
-
                case T3_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
                        break;
index 5c36ee2..1d04c87 100644 (file)
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
        return ret;
 }
 
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages)
-{
-       u32 stag;
-       int ret;
-
-       /* We could support this... */
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       if (cxio_reregister_phys_mem(&rhp->rdev,
-                                  &stag, mhp->attr.pdid,
-                                  mhp->attr.perms,
-                                  mhp->attr.zbva,
-                                  mhp->attr.va_fbo,
-                                  mhp->attr.len,
-                                  shift - 12,
-                                  mhp->attr.pbl_size, mhp->attr.pbl_addr))
-               return -ENOMEM;
-
-       ret = iwch_finish_mem_reg(mhp, stag);
-       if (ret)
-               cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
        return cxio_write_pbl(&mhp->rhp->rdev, pages,
                              mhp->attr.pbl_addr + (offset << 3), npages);
 }
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long) *iova_start,
-            (unsigned long long) mask, *shift, (unsigned long long) *total_size,
-            *npages);
-
-       return 0;
-
-}
index c34725c..2734820 100644 (file)
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_iwch_mr(ib_mr);
        kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        return 0;
 }
 
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct iwch_dev *rhp;
-       struct iwch_pd *php;
+       const u64 total_size = 0xffffffff;
+       const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+       struct iwch_pd *php = to_iwch_pd(pd);
+       struct iwch_dev *rhp = php->rhp;
        struct iwch_mr *mhp;
-       int ret;
+       __be64 *page_list;
+       int shift = 26, npages, ret, i;
 
        PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_iwch_pd(pd);
-       rhp = php->rhp;
+
+       /*
+        * T3 only supports 32 bits of size.
+        */
+       if (sizeof(phys_addr_t) > 4) {
+               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+               return ERR_PTR(-ENOTSUPP);
+       }
 
        mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
        if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
 
        mhp->rhp = rhp;
 
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+       npages = (total_size + (1ULL << shift) - 1) >> shift;
+       if (!npages) {
                ret = -EINVAL;
                goto err;
        }
 
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
+       page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+       if (!page_list) {
+               ret = -ENOMEM;
                goto err;
        }
 
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                  &total_size, &npages, &shift, &page_list);
-       if (ret)
-               goto err;
+       for (i = 0; i < npages; i++)
+               page_list[i] = cpu_to_be64((u64)i << shift);
+
+       PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+               __func__, mask, shift, total_size, npages);
 
        ret = iwch_alloc_pbl(mhp, npages);
        if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
        mhp->attr.zbva = 0;
 
        mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
+       mhp->attr.va_fbo = 0;
        mhp->attr.page_size = shift - 12;
 
        mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
 err:
        kfree(mhp);
        return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 * iova_start)
-{
-
-       struct iwch_mr mh, *mhp;
-       struct iwch_pd *php;
-       struct iwch_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages = 0;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_iwch_mr(mr);
-       rhp = mhp->rhp;
-       php = to_iwch_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_iwch_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mh.attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                          iova_start,
-                                          &total_size, &npages,
-                                          &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret) {
-               return ret;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
 }
 
-
 static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                      u64 virt, int acc, struct ib_udata *udata)
 {
@@ -726,28 +657,6 @@ err:
        return ERR_PTR(err);
 }
 
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva;
-       struct ib_mr *ibmr;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-
-       /*
-        * T3 only supports 32 bits of size.
-        */
-       if (sizeof(phys_addr_t) > 4) {
-               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
-               return ERR_PTR(-ENOTSUPP);
-       }
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       kva = 0;
-       ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
-       return ibmr;
-}
-
 static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
        struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.resize_cq = iwch_resize_cq;
        dev->ibdev.poll_cq = iwch_poll_cq;
        dev->ibdev.get_dma_mr = iwch_get_dma_mr;
-       dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
        dev->ibdev.reg_user_mr = iwch_reg_user_mr;
        dev->ibdev.dereg_mr = iwch_dereg_mr;
        dev->ibdev.alloc_mw = iwch_alloc_mw;
-       dev->ibdev.bind_mw = iwch_bind_mw;
        dev->ibdev.dealloc_mw = iwch_dealloc_mw;
        dev->ibdev.alloc_mr = iwch_alloc_mr;
        dev->ibdev.map_mr_sg = iwch_map_mr_sg;
index 2ac85b8..252c464 100644 (file)
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
 void stop_read_rep_timer(struct iwch_qp *qhp);
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                      struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages);
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
 void iwch_free_pbl(struct iwch_mr *mhp);
 int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list);
-
 
 #define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
 
index d0548fc..d939980 100644 (file)
@@ -526,88 +526,6 @@ out:
        return err;
 }
 
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       struct iwch_dev *rhp;
-       struct iwch_mw *mhp;
-       struct iwch_qp *qhp;
-       union t3_wr *wqe;
-       u32 pbl_addr;
-       u8 page_size;
-       u32 num_wrs;
-       unsigned long flag;
-       struct ib_sge sgl;
-       int err=0;
-       enum t3_wr_flags t3_wr_flags;
-       u32 idx;
-       struct t3_swsq *sqp;
-
-       qhp = to_iwch_qp(qp);
-       mhp = to_iwch_mw(mw);
-       rhp = qhp->rhp;
-
-       spin_lock_irqsave(&qhp->lock, flag);
-       if (qhp->attr.state > IWCH_QP_STATE_RTS) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
-       }
-       num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
-                           qhp->wq.sq_size_log2);
-       if (num_wrs == 0) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -ENOMEM;
-       }
-       idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-       PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
-            mw, mw_bind);
-       wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
-       t3_wr_flags = 0;
-       if (mw_bind->send_flags & IB_SEND_SIGNALED)
-               t3_wr_flags = T3_COMPLETION_FLAG;
-
-       sgl.addr = mw_bind->bind_info.addr;
-       sgl.lkey = mw_bind->bind_info.mr->lkey;
-       sgl.length = mw_bind->bind_info.length;
-       wqe->bind.reserved = 0;
-       wqe->bind.type = TPT_VATO;
-
-       /* TBD: check perms */
-       wqe->bind.perms = iwch_ib_to_tpt_bind_access(
-               mw_bind->bind_info.mw_access_flags);
-       wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
-       wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
-       wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
-       wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
-       err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
-       if (err) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return err;
-       }
-       wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
-       sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
-       sqp->wr_id = mw_bind->wr_id;
-       sqp->opcode = T3_BIND_MW;
-       sqp->sq_wptr = qhp->wq.sq_wptr;
-       sqp->complete = 0;
-       sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
-       wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
-       wqe->bind.mr_pagesz = page_size;
-       build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
-                      Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
-                      sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
-       ++(qhp->wq.wptr);
-       ++(qhp->wq.sq_wptr);
-       spin_unlock_irqrestore(&qhp->lock, flag);
-
-       if (cxio_wq_db_enabled(&qhp->wq))
-               ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-       return err;
-}
-
 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
                                    u8 *layer_type, u8 *ecode)
 {
index 326d07d..cd2ff5f 100644 (file)
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                    &ep->com.mapped_local_addr;
 
+       if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+               err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+                                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+               if (err)
+                       return err;
+       }
        c4iw_init_wr_wait(&ep->com.wr_wait);
        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
                                   ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                                          0, 0, __func__);
        else if (err > 0)
                err = net_xmit_errno(err);
-       if (err)
+       if (err) {
+               cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+                                  (const u32 *)&sin6->sin6_addr.s6_addr, 1);
                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
                       err, ep->stid,
                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
-       else
-               cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
-                              (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+       }
        return err;
 }
 
index de9cd69..cf21df4 100644 (file)
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                case FW_RI_SEND_WITH_SE:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case FW_RI_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
 
                case FW_RI_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
index 58fce17..8024ea4 100644 (file)
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
 static int qp_open(struct inode *inode, struct file *file)
 {
        struct c4iw_debugfs_data *qpd;
-       int ret = 0;
        int count = 1;
 
        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
-       if (!qpd) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!qpd)
+               return -ENOMEM;
+
        qpd->devp = inode->i_private;
        qpd->pos = 0;
 
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
        qpd->bufsize = count * 128;
        qpd->buf = vmalloc(qpd->bufsize);
        if (!qpd->buf) {
-               ret = -ENOMEM;
-               goto err1;
+               kfree(qpd);
+               return -ENOMEM;
        }
 
        spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
 
        qpd->buf[qpd->pos++] = 0;
        file->private_data = qpd;
-       goto out;
-err1:
-       kfree(qpd);
-out:
-       return ret;
+       return 0;
 }
 
 static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
                       rdev->lldi.ucq_density);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
                       rdev->lldi.vr->cq.size);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
 
        rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
             rdev->lldi.db_reg, rdev->lldi.gts_reg,
             rdev->qpmask, rdev->cqmask);
 
-       if (c4iw_num_stags(rdev) == 0) {
-               err = -EINVAL;
-               goto err1;
-       }
+       if (c4iw_num_stags(rdev) == 0)
+               return -EINVAL;
 
        rdev->stats.pd.total = T4_MAX_NUM_PD;
        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing resources\n", err);
-               goto err1;
+               return err;
        }
        err = c4iw_pblpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
-               goto err2;
+               goto destroy_resource;
        }
        err = c4iw_rqtpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
-               goto err3;
+               goto destroy_pblpool;
        }
        err = c4iw_ocqp_pool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
-               goto err4;
+               goto destroy_rqtpool;
        }
        rdev->status_page = (struct t4_dev_status_page *)
                            __get_free_page(GFP_KERNEL);
-       if (!rdev->status_page) {
-               pr_err(MOD "error allocating status page\n");
-               goto err4;
-       }
+       if (!rdev->status_page)
+               goto destroy_ocqp_pool;
+       rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+       rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+       rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+       rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 
        if (c4iw_wr_log) {
                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        rdev->status_page->db_off = 0;
 
        return 0;
-err4:
+destroy_ocqp_pool:
+       c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
        c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
        c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
        c4iw_destroy_resource(&rdev->resource);
-err1:
        return err;
 }
 
index 00e55fa..fb2de75 100644 (file)
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
 int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
                                           u64 length, u64 virt, int acc,
                                           struct ib_udata *udata);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 *iova_start);
 int c4iw_dereg_mr(struct ib_mr *ib_mr);
 int c4iw_destroy_cq(struct ib_cq *ib_cq);
 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
index e1629ab..7849890 100644 (file)
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
        return ret;
 }
 
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
-                         struct c4iw_mr *mhp, int shift, int npages)
-{
-       u32 stag;
-       int ret;
-
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-                             FW_RI_STAG_NSMR, mhp->attr.perms,
-                             mhp->attr.mw_bind_enable, mhp->attr.zbva,
-                             mhp->attr.va_fbo, mhp->attr.len, shift - 12,
-                             mhp->attr.pbl_size, mhp->attr.pbl_addr);
-       if (ret)
-               return ret;
-
-       ret = finish_mem_reg(mhp, stag);
-       if (ret)
-               dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 static int alloc_pbl(struct c4iw_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
        return 0;
 }
 
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, u64 *iova_start,
-                               u64 *total_size, int *npages,
-                               int *shift, __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long)*iova_start,
-            (unsigned long long)mask, *shift, (unsigned long long)*total_size,
-            *npages);
-
-       return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
-                            struct ib_pd *pd, struct ib_phys_buf *buffer_list,
-                            int num_phys_buf, int acc, u64 *iova_start)
-{
-
-       struct c4iw_mr mh, *mhp;
-       struct c4iw_pd *php;
-       struct c4iw_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_c4iw_mr(mr);
-       rhp = mhp->rhp;
-       php = to_c4iw_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_c4iw_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
-               mh.attr.perms = c4iw_ib_to_tpt_access(acc);
-               mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
-                                        IB_ACCESS_MW_BIND;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                               iova_start,
-                                               &total_size, &npages,
-                                               &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               return -EINVAL;
-       }
-
-       ret = reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret)
-               return ret;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf, int acc, u64 *iova_start)
-{
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct c4iw_dev *rhp;
-       struct c4iw_pd *php;
-       struct c4iw_mr *mhp;
-       int ret;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_c4iw_pd(pd);
-       rhp = php->rhp;
-
-       mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-       if (!mhp)
-               return ERR_PTR(-ENOMEM);
-
-       mhp->rhp = rhp;
-
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                       &total_size, &npages, &shift,
-                                       &page_list);
-       if (ret)
-               goto err;
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = alloc_pbl(mhp, npages);
-       if (ret) {
-               kfree(page_list);
-               goto err;
-       }
-
-       ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
-                            npages);
-       kfree(page_list);
-       if (ret)
-               goto err_pbl;
-
-       mhp->attr.pdid = php->pdid;
-       mhp->attr.zbva = 0;
-
-       mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
-       mhp->attr.page_size = shift - 12;
-
-       mhp->attr.len = (u32) total_size;
-       mhp->attr.pbl_size = npages;
-       ret = register_mem(rhp, php, mhp, shift);
-       if (ret)
-               goto err_pbl;
-
-       return &mhp->ibmr;
-
-err_pbl:
-       c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
-                             mhp->attr.pbl_size << 3);
-
-err:
-       kfree(mhp);
-       return ERR_PTR(ret);
-
-}
-
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
 {
        struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_c4iw_mr(ib_mr);
        rhp = mhp->rhp;
index 0a7d998..ec04272 100644 (file)
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
        dev->ibdev.resize_cq = c4iw_resize_cq;
        dev->ibdev.poll_cq = c4iw_poll_cq;
        dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
        dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
        dev->ibdev.dereg_mr = c4iw_dereg_mr;
        dev->ibdev.alloc_mw = c4iw_alloc_mw;
-       dev->ibdev.bind_mw = c4iw_bind_mw;
        dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
        dev->ibdev.alloc_mr = c4iw_alloc_mr;
        dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
index aa515af..e99345e 100644 (file)
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        return err;
 }
 
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
-       return -ENOSYS;
-}
-
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
                                    u8 *ecode)
 {
index 1092a2d..6126bbe 100644 (file)
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
 
 struct t4_dev_status_page {
        u8 db_off;
+       u8 pad1;
+       u16 pad2;
+       u32 pad3;
+       u64 qp_start;
+       u64 qp_size;
+       u64 cq_start;
+       u64 cq_size;
 };
index cbd0ce1..295f422 100644 (file)
@@ -32,7 +32,7 @@
 #ifndef __C4IW_USER_H__
 #define __C4IW_USER_H__
 
-#define C4IW_UVERBS_ABI_VERSION        2
+#define C4IW_UVERBS_ABI_VERSION        3
 
 /*
  * Make sure that all structs defined in this file remain laid out so
index 86af713..105246f 100644 (file)
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
                                ah_attr->grh.sgid_index, &sgid, &gid_attr);
        if (ret)
                return ERR_PTR(ret);
-       memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+       eth_zero_addr(ah->av.eth.s_mac);
        if (gid_attr.ndev) {
                if (is_vlan_dev(gid_attr.ndev))
                        vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
        ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
        ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+       ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
        if (ah_attr->static_rate) {
                ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
                while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
index b88fc8f..9f8b516 100644 (file)
@@ -811,9 +811,6 @@ repoll:
                        wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                        wc->byte_len  = 8;
                        break;
-               case MLX4_OPCODE_BIND_MW:
-                       wc->opcode    = IB_WC_BIND_MW;
-                       break;
                case MLX4_OPCODE_LSO:
                        wc->opcode    = IB_WC_LSO;
                        break;
index 97d6878..1c7ab6c 100644 (file)
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
        return dev;
 }
 
-static int mlx4_ib_update_gids(struct gid_entry *gids,
-                              struct mlx4_ib_dev *ibdev,
-                              u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+                                 struct mlx4_ib_dev *ibdev,
+                                 u8 port_num)
 {
        struct mlx4_cmd_mailbox *mailbox;
        int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
        return err;
 }
 
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+                                    struct mlx4_ib_dev *ibdev,
+                                    u8 port_num)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       int err;
+       struct mlx4_dev *dev = ibdev->dev;
+       int i;
+       struct {
+               union ib_gid    gid;
+               __be32          rsrvd1[2];
+               __be16          rsrvd2;
+               u8              type;
+               u8              version;
+               __be32          rsrvd3;
+       } *gid_tbl;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return -ENOMEM;
+
+       gid_tbl = mailbox->buf;
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+               memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+               if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+                       gid_tbl[i].version = 2;
+                       if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+                               gid_tbl[i].type = 1;
+                       else
+                               memset(&gid_tbl[i].gid, 0, 12);
+               }
+       }
+
+       err = mlx4_cmd(dev, mailbox->dma,
+                      MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+                      1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                      MLX4_CMD_WRAPPED);
+       if (mlx4_is_bonded(dev))
+               err += mlx4_cmd(dev, mailbox->dma,
+                               MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+                               1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                               MLX4_CMD_WRAPPED);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+                              struct mlx4_ib_dev *ibdev,
+                              u8 port_num)
+{
+       if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+       return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
 static int mlx4_ib_add_gid(struct ib_device *device,
                           u8 port_num,
                           unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
        port_gid_table = &iboe->gids[port_num - 1];
        spin_lock_bh(&iboe->lock);
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+                   (port_gid_table->gids[i].gid_type == attr->gid_type))  {
                        found = i;
                        break;
                }
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                        } else {
                                *context = port_gid_table->gids[free].ctx;
                                memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+                               port_gid_table->gids[free].gid_type = attr->gid_type;
                                port_gid_table->gids[free].ctx->real_index = free;
                                port_gid_table->gids[free].ctx->refcount = 1;
                                hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                if (!gids) {
                        ret = -ENOMEM;
                } else {
-                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
                                memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+                               gids[i].gid_type = port_gid_table->gids[i].gid_type;
+                       }
                }
        }
        spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        int i;
        int ret;
        unsigned long flags;
+       struct ib_gid_attr attr;
 
        if (port_num > MLX4_MAX_PORTS)
                return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
                return index;
 
-       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
        if (ret)
                return ret;
 
+       if (attr.ndev)
+               dev_put(attr.ndev);
+
        if (!memcmp(&gid, &zgid, sizeof(gid)))
                return -EINVAL;
 
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        port_gid_table = &iboe->gids[port_num - 1];
 
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
-               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+                   attr.gid_type == port_gid_table->gids[i].gid_type) {
                        ctx = port_gid_table->gids[i].ctx;
                        break;
                }
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
        struct ib_port_attr attr;
+       struct mlx4_ib_dev *mdev = to_mdev(ibdev);
        int err;
 
        err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 
-       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
                immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       else
-               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+       } else {
+               if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+               if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+                               RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+       }
 
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
            dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
                ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
-               ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
                ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
 
                ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (mlx4_ib_init_sriov(ibdev))
                goto err_mad;
 
-       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
                if (!iboe->nb.notifier_call) {
                        iboe->nb.notifier_call = mlx4_ib_netdev_event;
                        err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                goto err_notif;
                        }
                }
+               if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+                       if (err) {
+                               goto err_notif;
+                       }
+               }
        }
 
        for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
index 1caa11e..52ce7b0 100644 (file)
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
        unsigned                tail;
 };
 
+enum {
+       MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
 enum mlx4_ib_qp_flags {
        MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
        MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
        MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+       /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+       MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
        MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
        MLX4_IB_SRIOV_SQP = 1 << 31,
 };
@@ -478,6 +485,7 @@ struct gid_cache_context {
 
 struct gid_entry {
        union ib_gid    gid;
+       enum ib_gid_type gid_type;
        struct gid_cache_context *ctx;
 };
 
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
index 4d1e1c6..242b94e 100644 (file)
@@ -366,28 +366,6 @@ err_free:
        return ERR_PTR(err);
 }
 
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind)
-{
-       struct ib_bind_mw_wr  wr;
-       struct ib_send_wr *bad_wr;
-       int ret;
-
-       memset(&wr, 0, sizeof(wr));
-       wr.wr.opcode            = IB_WR_BIND_MW;
-       wr.wr.wr_id             = mw_bind->wr_id;
-       wr.wr.send_flags        = mw_bind->send_flags;
-       wr.mw                   = mw;
-       wr.bind_info            = mw_bind->bind_info;
-       wr.rkey                 = ib_inc_rkey(mw->rkey);
-
-       ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
-       if (!ret)
-               mw->rkey = wr.rkey;
-
-       return ret;
-}
-
 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
 {
        struct mlx4_ib_mw *mw = to_mmw(ibmw);
index 13eaaf4..bc5536f 100644 (file)
@@ -32,6 +32,8 @@
  */
 
 #include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
        u32                     send_psn;
        struct ib_ud_header     ud_header;
        u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
+       struct ib_qp            *roce_v2_gsi;
 };
 
 enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
-       [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
                        }
                }
        }
-       return proxy_sqp;
+       if (proxy_sqp)
+               return 1;
+
+       return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
 }
 
 /* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+               qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->sq.wrid)
                        qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
-               qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+               qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->rq.wrid)
                        qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
                return dev->dev->caps.qp1_proxy[attr->port_num - 1];
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-                               struct ib_qp_init_attr *init_attr,
-                               struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+                                       struct ib_qp_init_attr *init_attr,
+                                       struct ib_udata *udata)
 {
        struct mlx4_ib_qp *qp = NULL;
        int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                        MLX4_IB_SRIOV_TUNNEL_QP |
                                        MLX4_IB_SRIOV_SQP |
                                        MLX4_IB_QP_NETIF |
+                                       MLX4_IB_QP_CREATE_ROCE_V2_GSI |
                                        MLX4_IB_QP_CREATE_USE_GFP_NOIO))
                return ERR_PTR(-EINVAL);
 
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                        return ERR_PTR(-EINVAL);
        }
 
-       if (init_attr->create_flags &&
-           ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
-            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
-                                          MLX4_IB_QP_CREATE_USE_GFP_NOIO |
-                                          MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
-             init_attr->qp_type != IB_QPT_UD) ||
-            ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
-             init_attr->qp_type > IB_QPT_GSI)))
-               return ERR_PTR(-EINVAL);
+       if (init_attr->create_flags) {
+               if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+                       return ERR_PTR(-EINVAL);
+
+               if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+                                                MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+                                                MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
+                                                MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+                    init_attr->qp_type != IB_QPT_UD) ||
+                   (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+                    init_attr->qp_type > IB_QPT_GSI) ||
+                   (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+                    init_attr->qp_type != IB_QPT_GSI))
+                       return ERR_PTR(-EINVAL);
+       }
 
        switch (init_attr->qp_type) {
        case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
+               int sqpn;
+
                /* Userspace is not allowed to create special QPs: */
                if (udata)
                        return ERR_PTR(-EINVAL);
+               if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+                       int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+                       if (res)
+                               return ERR_PTR(res);
+               } else {
+                       sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+               }
 
                err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-                                      get_sqp_num(to_mdev(pd->device), init_attr),
+                                      sqpn,
                                       &qp, gfp);
                if (err)
                        return ERR_PTR(err);
 
                qp->port        = init_attr->port_num;
-               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+                       init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
                break;
        }
        default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        return &qp->ibqp;
 }
 
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata) {
+       struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+       struct ib_qp *ibqp;
+       struct mlx4_ib_dev *dev = to_mdev(device);
+
+       ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+       if (!IS_ERR(ibqp) &&
+           (init_attr->qp_type == IB_QPT_GSI) &&
+           !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+               struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+               if (is_eth &&
+                   dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+                       sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+                       if (IS_ERR(sqp->roce_v2_gsi)) {
+                               pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+                               sqp->roce_v2_gsi = NULL;
+                       } else {
+                               sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+                               sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+                       }
+
+                       init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+               }
+       }
+       return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
 {
        struct mlx4_ib_dev *dev = to_mdev(qp->device);
        struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
        return 0;
 }
 
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+               if (sqp->roce_v2_gsi)
+                       ib_destroy_qp(sqp->roce_v2_gsi);
+       }
+
+       return _mlx4_ib_destroy_qp(qp);
+}
+
 static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
 {
        switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        return 0;
 }
 
+enum {
+       MLX4_QPC_ROCE_MODE_1 = 0,
+       MLX4_QPC_ROCE_MODE_2 = 2,
+       MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+       switch (gid_type) {
+       case IB_GID_TYPE_ROCE:
+               return MLX4_QPC_ROCE_MODE_1;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               return MLX4_QPC_ROCE_MODE_2;
+       default:
+               return MLX4_QPC_ROCE_MODE_UNDEFINED;
+       }
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                        mlx4_ib_steer_qp_reg(dev, qp, 1);
                        steer_qp = 1;
                }
+
+               if (ibqp->qp_type == IB_QPT_GSI) {
+                       enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+                               IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
        }
 
        if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                u16 vlan = 0xffff;
                u8 smac[ETH_ALEN];
                int status = 0;
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+                       attr->ah_attr.ah_flags & IB_AH_GRH;
 
-               if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
-                   attr->ah_attr.ah_flags & IB_AH_GRH) {
+               if (is_eth) {
                        int index = attr->ah_attr.grh.sgid_index;
 
                        status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 
                optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
                           MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+               if (is_eth &&
+                   (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+                       if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
+
        }
 
        if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                sqd_event = 0;
 
        if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-               context->rlkey |= (1 << 4);
+               context->rlkey_roce_mode |= (1 << 4);
 
        /*
         * Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
        return err;
 }
 
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                     int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                             int attr_mask, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
        return err;
 }
 
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+       int ret;
+
+       ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+               int err = 0;
+
+               if (sqp->roce_v2_gsi)
+                       err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+               if (err)
+                       pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+                              err);
+       }
+       return ret;
+}
+
 static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 {
        int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
                send_size += sizeof (struct mlx4_ib_tunnel_header);
 
-       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
 
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
                sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        return 0;
 }
 
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
-       int i;
-
-       for (i = ETH_ALEN; i; i--) {
-               dst_mac[i - 1] = src_mac & 0xff;
-               src_mac >>= 8;
-       }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        bool is_eth;
        bool is_vlan = false;
        bool is_grh;
+       bool is_udp = false;
+       int ip_version = 0;
 
        send_size = 0;
        for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
        if (is_eth) {
+               struct ib_gid_attr gid_attr;
+
                if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
                        /* When multi-function is enabled, the ib_core gid
                         * indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                        err = ib_get_cached_gid(ib_dev,
                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
                                                ah->av.ib.gid_index, &sgid,
-                                               NULL);
-                       if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
-                               err = -ENOENT;
-                       if (err)
+                                               &gid_attr);
+                       if (!err) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+                                       err = -ENOENT;
+                       }
+                       if (!err) {
+                               is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+                               if (is_udp) {
+                                       if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+                                               ip_version = 4;
+                                       else
+                                               ip_version = 6;
+                                       is_grh = false;
+                               }
+                       } else {
                                return err;
+                       }
                }
-
                if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
                        vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
                        is_vlan = 1;
                }
        }
-       ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+       err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+                         ip_version, is_udp, 0, &sqp->ud_header);
+       if (err)
+               return err;
 
        if (!is_eth) {
                sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
        }
 
-       if (is_grh) {
+       if (is_grh || (ip_version == 6)) {
                sqp->ud_header.grh.traffic_class =
                        (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
                sqp->ud_header.grh.flow_label    =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                       ah->av.ib.dgid, 16);
        }
 
+       if (ip_version == 4) {
+               sqp->ud_header.ip4.tos =
+                       (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+               sqp->ud_header.ip4.id = 0;
+               sqp->ud_header.ip4.frag_off = htons(IP_DF);
+               sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+               memcpy(&sqp->ud_header.ip4.saddr,
+                      sgid.raw + 12, 4);
+               memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+               sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+       }
+
+       if (is_udp) {
+               sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+               sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+               sqp->ud_header.udp.csum = 0;
+       }
+
        mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
        if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 
        if (is_eth) {
                struct in6_addr in6;
-
+               u16 ether_type;
                u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
 
+               ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+                       (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
                mlx->sched_prio = cpu_to_be16(pcp);
 
+               ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
                memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
-               /* FIXME: cache smac value? */
                memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
                memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
                memcpy(&in6, sgid.raw, sizeof(in6));
 
-               if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
-                       u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
-                       u8 smac[ETH_ALEN];
-
-                       mlx4_u64_to_smac(smac, mac);
-                       memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
-               } else {
-                       /* use the src mac of the tunnel */
-                       memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
-               }
 
                if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
                        mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
                if (!is_vlan) {
-                       sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.eth.type = cpu_to_be16(ether_type);
                } else {
-                       sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
                        sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
                }
        } else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
        fseg->reserved[1]       = 0;
 }
 
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
-               struct ib_bind_mw_wr *wr)
-{
-       bseg->flags1 =
-               convert_access(wr->bind_info.mw_access_flags) &
-               cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
-                           MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
-                           MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
-       bseg->flags2 = 0;
-       if (wr->mw->type == IB_MW_TYPE_2)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
-       if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
-       bseg->new_rkey = cpu_to_be32(wr->rkey);
-       bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
-       bseg->addr = cpu_to_be64(wr->bind_info.addr);
-       bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 {
        memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int i;
        struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+               if (sqp->roce_v2_gsi) {
+                       struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+                       struct ib_gid_attr gid_attr;
+                       union ib_gid gid;
+
+                       if (!ib_get_cached_gid(ibqp->device,
+                                              be32_to_cpu(ah->av.ib.port_pd) >> 24,
+                                              ah->av.ib.gid_index, &gid,
+                                              &gid_attr)) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+                                       to_mqp(sqp->roce_v2_gsi) : qp;
+                       } else {
+                               pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+                                      ah->av.ib.gid_index);
+                       }
+               }
+       }
+
        spin_lock_irqsave(&qp->sq.lock, flags);
        if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
                err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
-                       case IB_WR_BIND_MW:
-                               ctrl->srcrb_flags |=
-                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_bind_seg(wqe, bind_mw_wr(wr));
-                               wqe  += sizeof(struct mlx4_wqe_bind_seg);
-                               size += sizeof(struct mlx4_wqe_bind_seg) / 16;
-                               break;
                        default:
                                /* No extra segments required for sends */
                                break;
index c394376..0597f3e 100644 (file)
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+               srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+                                       GFP_KERNEL | __GFP_NOWARN);
                if (!srq->wrid) {
                        srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
                                              GFP_KERNEL, PAGE_KERNEL);
index 6608058..745efa4 100644 (file)
 
 #include "mlx5_ib.h"
 
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_ah *ah,
+                                 struct ib_ah_attr *ah_attr,
+                                 enum rdma_link_layer ll)
 {
        if (ah_attr->ah_flags & IB_AH_GRH) {
                memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
                ah->av.tclass = ah_attr->grh.traffic_class;
        }
 
-       ah->av.rlid = cpu_to_be16(ah_attr->dlid);
-       ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
-       ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+       ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+               ah->av.udp_sport =
+                       mlx5_get_roce_udp_sport(dev,
+                                               ah_attr->port_num,
+                                               ah_attr->grh.sgid_index);
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+       } else {
+               ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+               ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+       }
 
        return &ah->ibah;
 }
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 {
        struct mlx5_ib_ah *ah;
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       enum rdma_link_layer ll;
+
+       ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+       if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+               return ERR_PTR(-EINVAL);
 
        ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
 
-       return create_ib_ah(ah_attr, ah); /* never fails */
+       return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
 }
 
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
index 92ddae1..fd1de31 100644 (file)
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                wc->byte_len  = 8;
                break;
-       case MLX5_OPCODE_BIND_MW:
-               wc->opcode    = IB_WC_BIND_MW;
-               break;
        case MLX5_OPCODE_UMR:
                wc->opcode = get_umr_comp(wq, idx);
                break;
@@ -171,6 +168,7 @@ enum {
 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                             struct mlx5_ib_qp *qp)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
        struct mlx5_ib_srq *srq;
        struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        } else {
                wc->pkey_index = 0;
        }
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return;
+
+       switch (wc->sl & 0x3) {
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+               wc->network_hdr_type = RDMA_NETWORK_IB;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+               wc->network_hdr_type = RDMA_NETWORK_IPV6;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+               wc->network_hdr_type = RDMA_NETWORK_IPV4;
+               break;
+       }
+       wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
 }
 
 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        int eqn;
        int err;
 
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
        if (entries < 0)
                return ERR_PTR(-EINVAL);
 
+       if (check_cq_create_flags(attr->flags))
+               return ERR_PTR(-EOPNOTSUPP);
+
        entries = roundup_pow_of_two(entries + 1);
        if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
                return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        spin_lock_init(&cq->lock);
        cq->resize_buf = NULL;
        cq->resize_umem = NULL;
+       cq->create_flags = attr->flags;
 
        if (context) {
                err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 
        cq->cqe_size = cqe_size;
        cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+       if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+               cqb->ctx.cqe_sz_flags |= (1 << 1);
+
        cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
        err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
        if (err)
index b0ec175..03c418c 100644 (file)
@@ -40,6 +40,8 @@
 #include <linux/io-mapping.h>
 #include <linux/sched.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
+enum {
+       MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
 static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
 {
-       struct mlx5_ib_dev *dev = to_mdev(device);
-
-       switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+       switch (port_type_cap) {
        case MLX5_CAP_PORT_TYPE_IB:
                return IB_LINK_LAYER_INFINIBAND;
        case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
        }
 }
 
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+       return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+                            unsigned long event, void *ptr)
+{
+       struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+       struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+                                                roce.nb);
+
+       if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+               return NOTIFY_DONE;
+
+       write_lock(&ibdev->roce.netdev_lock);
+       if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+               ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+       write_unlock(&ibdev->roce.netdev_lock);
+
+       return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+                                            u8 port_num)
+{
+       struct mlx5_ib_dev *ibdev = to_mdev(device);
+       struct net_device *ndev;
+
+       /* Ensure ndev does not disappear before we invoke dev_hold()
+        */
+       read_lock(&ibdev->roce.netdev_lock);
+       ndev = ibdev->roce.netdev;
+       if (ndev)
+               dev_hold(ndev);
+       read_unlock(&ibdev->roce.netdev_lock);
+
+       return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+                               struct ib_port_attr *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       struct net_device *ndev;
+       enum ib_mtu ndev_ib_mtu;
+       u16 qkey_viol_cntr;
+
+       memset(props, 0, sizeof(*props));
+
+       props->port_cap_flags  |= IB_PORT_CM_SUP;
+       props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
+
+       props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
+                                               roce_address_table_size);
+       props->max_mtu          = IB_MTU_4096;
+       props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+       props->pkey_tbl_len     = 1;
+       props->state            = IB_PORT_DOWN;
+       props->phys_state       = 3;
+
+       mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+       props->qkey_viol_cntr = qkey_viol_cntr;
+
+       ndev = mlx5_ib_get_netdev(device, port_num);
+       if (!ndev)
+               return 0;
+
+       if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+               props->state      = IB_PORT_ACTIVE;
+               props->phys_state = 5;
+       }
+
+       ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+       dev_put(ndev);
+
+       props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
+
+       props->active_width     = IB_WIDTH_4X;  /* TODO */
+       props->active_speed     = IB_SPEED_QDR; /* TODO */
+
+       return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+                                    const struct ib_gid_attr *attr,
+                                    void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+       char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_l3_address);
+       void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_mac_47_32);
+
+       if (!gid)
+               return;
+
+       ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+       if (is_vlan_dev(attr->ndev)) {
+               MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+               MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+       }
+
+       switch (attr->gid_type) {
+       case IB_GID_TYPE_IB:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+               break;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+               break;
+
+       default:
+               WARN_ON(true);
+       }
+
+       if (attr->gid_type != IB_GID_TYPE_IB) {
+               if (ipv6_addr_v4mapped((void *)gid))
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV4);
+               else
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV6);
+       }
+
+       if ((attr->gid_type == IB_GID_TYPE_IB) ||
+           !ipv6_addr_v4mapped((void *)gid))
+               memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+       else
+               memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+                        unsigned int index,
+                        const union ib_gid *gid,
+                        const struct ib_gid_attr *attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
+       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+       void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return -EINVAL;
+
+       memset(in, 0, sizeof(in));
+
+       ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+       MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+       MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+       memset(out, 0, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, const union ib_gid *gid,
+                          const struct ib_gid_attr *attr,
+                          __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index)
+{
+       struct ib_gid_attr attr;
+       union ib_gid gid;
+
+       if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+               return 0;
+
+       if (!attr.ndev)
+               return 0;
+
+       dev_put(attr.ndev);
+
+       if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+               return 0;
+
+       return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
        return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
        if (mlx5_use_mad_ifc(to_mdev(ibdev)))
                return MLX5_VPORT_ACCESS_METHOD_MAD;
 
-       if (mlx5_ib_port_link_layer(ibdev) ==
+       if (mlx5_ib_port_link_layer(ibdev, 1) ==
            IB_LINK_LAYER_ETHERNET)
                return MLX5_VPORT_ACCESS_METHOD_NIC;
 
        return MLX5_VPORT_ACCESS_METHOD_HCA;
 }
 
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+                           struct ib_device_attr *props)
+{
+       u8 tmp;
+       u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+       u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+       u8 atomic_req_8B_endianness_mode =
+               MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+       /* Check if HW supports 8 bytes standard atomic operations and capable
+        * of host endianness respond
+        */
+       tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+       if (((atomic_operations & tmp) == tmp) &&
+           (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+           (atomic_req_8B_endianness_mode)) {
+               props->atomic_cap = IB_ATOMIC_HCA;
+       } else {
+               props->atomic_cap = IB_ATOMIC_NONE;
+       }
+}
+
 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
                                        __be64 *sys_image_guid)
 {
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
-               if (!err)
-                       *sys_image_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *sys_image_guid = cpu_to_be64(tmp);
+
+       return err;
+
 }
 
 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
-               if (!err)
-                       *node_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *node_guid = cpu_to_be64(tmp);
+
+       return err;
 }
 
 struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        if (MLX5_CAP_GEN(mdev, block_lb_mc))
                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+           (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+                       props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
        props->vendor_part_id      = mdev->pdev->device;
        props->hw_ver              = mdev->pdev->revision;
 
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_sge = min(max_rq_sg, max_sq_sg);
        props->max_sge_rd = props->max_sge;
        props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
-       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
        props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
        props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
        props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
        props->max_srq_sge         = max_rq_sg - 1;
        props->max_fast_reg_page_list_len = (unsigned int)-1;
-       props->atomic_cap          = IB_ATOMIC_NONE;
+       get_atomic_caps(dev, props);
        props->masked_atomic_cap   = IB_ATOMIC_NONE;
        props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
        props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
                                           props->max_mcast_grp;
        props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+       props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+       props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->odp_caps = dev->odp_caps;
 #endif
 
+       if (MLX5_CAP_GEN(mdev, cd))
+               props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
        return 0;
 }
 
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                return mlx5_query_hca_port(ibdev, port, props);
 
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               return mlx5_query_port_roce(ibdev, port, props);
+
        default:
                return -EINVAL;
        }
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                                                  struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
-       struct mlx5_ib_alloc_ucontext_req_v2 req;
-       struct mlx5_ib_alloc_ucontext_resp resp;
+       struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+       struct mlx5_ib_alloc_ucontext_resp resp = {};
        struct mlx5_ib_ucontext *context;
        struct mlx5_uuar_info *uuari;
        struct mlx5_uar *uars;
@@ -595,24 +844,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        int err;
        int i;
        size_t reqlen;
+       size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+                                    max_cqe_version);
 
        if (!dev->ib_active)
                return ERR_PTR(-EAGAIN);
 
-       memset(&req, 0, sizeof(req));
+       if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+               return ERR_PTR(-EINVAL);
+
        reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
        if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
                ver = 0;
-       else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+       else if (reqlen >= min_req_v2)
                ver = 2;
        else
                return ERR_PTR(-EINVAL);
 
-       err = ib_copy_from_udata(&req, udata, reqlen);
+       err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
        if (err)
                return ERR_PTR(err);
 
-       if (req.flags || req.reserved)
+       if (req.flags)
                return ERR_PTR(-EINVAL);
 
        if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +874,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        if (req.total_num_uuars == 0)
                return ERR_PTR(-EINVAL);
 
+       if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       if (reqlen > sizeof(req) &&
+           !ib_is_udata_cleared(udata, sizeof(req),
+                                reqlen - sizeof(req)))
+               return ERR_PTR(-EOPNOTSUPP);
+
        req.total_num_uuars = ALIGN(req.total_num_uuars,
                                    MLX5_NON_FP_BF_REGS_PER_PAGE);
        if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +897,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+       resp.cqe_version = min_t(__u8,
+                                (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+                                req.max_cqe_version);
+       resp.response_length = min(offsetof(typeof(resp), response_length) +
+                                  sizeof(resp.response_length), udata->outlen);
 
        context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
@@ -681,22 +947,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+               err = mlx5_core_alloc_transport_domain(dev->mdev,
+                                                      &context->tdn);
+               if (err)
+                       goto out_uars;
+       }
+
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
        resp.tot_uuars = req.total_num_uuars;
        resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
-       err = ib_copy_to_udata(udata, &resp,
-                              sizeof(resp) - sizeof(resp.reserved));
+
+       if (field_avail(typeof(resp), cqe_version, udata->outlen))
+               resp.response_length += sizeof(resp.cqe_version);
+
+       if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+               resp.comp_mask |=
+                       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+               resp.hca_core_clock_offset =
+                       offsetof(struct mlx5_init_seg, internal_timer_h) %
+                       PAGE_SIZE;
+               resp.response_length += sizeof(resp.hca_core_clock_offset) +
+                                       sizeof(resp.reserved2) +
+                                       sizeof(resp.reserved3);
+       }
+
+       err = ib_copy_to_udata(udata, &resp, resp.response_length);
        if (err)
-               goto out_uars;
+               goto out_td;
 
        uuari->ver = ver;
        uuari->num_low_latency_uuars = req.num_low_latency_uuars;
        uuari->uars = uars;
        uuari->num_uars = num_uars;
+       context->cqe_version = resp.cqe_version;
+
        return &context->ibucontext;
 
+out_td:
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
 out_uars:
        for (i--; i >= 0; i--)
                mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1014,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        struct mlx5_uuar_info *uuari = &context->uuari;
        int i;
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
        for (i = 0; i < uuari->num_uars; i++) {
                if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
                        mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1086,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
        case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
                return -ENOSYS;
 
+       case MLX5_IB_MMAP_CORE_CLOCK:
+               if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+                       return -EINVAL;
+
+               if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+                       return -EPERM;
+
+               /* Don't expose to user-space information it shouldn't have */
+               if (PAGE_SIZE > 4096)
+                       return -EOPNOTSUPP;
+
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+               pfn = (dev->mdev->iseg_base +
+                      offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+                       PAGE_SHIFT;
+               if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+                                      PAGE_SIZE, vma->vm_page_prot))
+                       return -EAGAIN;
+
+               mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+                           vma->vm_start,
+                           (unsigned long long)pfn << PAGE_SHIFT);
+               break;
+
        default:
                return -EINVAL;
        }
@@ -1758,6 +2078,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
        mlx5_ib_dealloc_pd(devr->p0);
 }
 
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+       u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+       u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+       u32 ret = 0;
+
+       if (ll == IB_LINK_LAYER_INFINIBAND)
+               return RDMA_CORE_PORT_IBA_IB;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+               return 0;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+               return 0;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       return ret;
+}
+
 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
@@ -1770,20 +2116,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+       immutable->core_cap_flags = get_core_cap_flags(ibdev);
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
 }
 
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+       int err;
+
+       dev->roce.nb.notifier_call = mlx5_netdev_event;
+       err = register_netdevice_notifier(&dev->roce.nb);
+       if (err)
+               return err;
+
+       err = mlx5_nic_vport_enable_roce(dev->mdev);
+       if (err)
+               goto err_unregister_netdevice_notifier;
+
+       return 0;
+
+err_unregister_netdevice_notifier:
+       unregister_netdevice_notifier(&dev->roce.nb);
+       return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+       mlx5_nic_vport_disable_roce(dev->mdev);
+       unregister_netdevice_notifier(&dev->roce.nb);
+}
+
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        struct mlx5_ib_dev *dev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
        int err;
        int i;
 
-       /* don't create IB instance over Eth ports, no RoCE yet! */
-       if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
                return NULL;
 
        printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2170,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
        dev->mdev = mdev;
 
+       rwlock_init(&dev->roce.netdev_lock);
        err = get_port_caps(dev);
        if (err)
                goto err_dealloc;
@@ -1839,11 +2216,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
                (1ull << IB_USER_VERBS_CMD_OPEN_QP);
        dev->ib_dev.uverbs_ex_cmd_mask =
-               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)     |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)        |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
        dev->ib_dev.query_port          = mlx5_ib_query_port;
+       dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
+       dev->ib_dev.add_gid             = mlx5_ib_add_gid;
+       dev->ib_dev.del_gid             = mlx5_ib_del_gid;
        dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
        dev->ib_dev.modify_device       = mlx5_ib_modify_device;
        dev->ib_dev.modify_port         = mlx5_ib_modify_port;
@@ -1893,7 +2277,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                        (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
        }
 
-       if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+       if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
            IB_LINK_LAYER_ETHERNET) {
                dev->ib_dev.create_flow = mlx5_ib_create_flow;
                dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2292,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
 
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               err = mlx5_enable_roce(dev);
+               if (err)
+                       goto err_dealloc;
+       }
+
        err = create_dev_resources(&dev->devr);
        if (err)
-               goto err_dealloc;
+               goto err_disable_roce;
 
        err = mlx5_ib_odp_init_one(dev);
        if (err)
@@ -1947,6 +2337,10 @@ err_odp:
 err_rsrc:
        destroy_dev_resources(&dev->devr);
 
+err_disable_roce:
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
+
 err_dealloc:
        ib_dealloc_device((struct ib_device *)dev);
 
@@ -1956,11 +2350,14 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
        struct mlx5_ib_dev *dev = context;
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 
        ib_unregister_device(&dev->ib_dev);
        destroy_umrc_res(dev);
        mlx5_ib_odp_remove_one(dev);
        destroy_dev_resources(&dev->devr);
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
        ib_dealloc_device(&dev->ib_dev);
 }
 
index 1474ccc..d2b9737 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/types.h>
+#include <linux/mlx5/transobj.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)                               \
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
 pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,    \
        __LINE__, current->pid, ##arg)
 
+#define field_avail(type, fld, sz) (offsetof(type, fld) +              \
+                                   sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
 enum {
        MLX5_IB_MMAP_CMD_SHIFT  = 8,
        MLX5_IB_MMAP_CMD_MASK   = 0xff,
@@ -62,7 +68,9 @@ enum {
 
 enum mlx5_ib_mmap_cmd {
        MLX5_IB_MMAP_REGULAR_PAGE               = 0,
-       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1, /* always last */
+       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1,
+       /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+       MLX5_IB_MMAP_CORE_CLOCK                 = 5,
 };
 
 enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
        MLX5_MAD_IFC_NET_VIEW           = 4,
 };
 
+enum {
+       MLX5_CROSS_CHANNEL_UUAR         = 0,
+};
+
+enum {
+       MLX5_CQE_VERSION_V0,
+       MLX5_CQE_VERSION_V1,
+};
+
 struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
        struct list_head        db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
         */
        struct mutex            db_page_mutex;
        struct mlx5_uuar_info   uuari;
+       u8                      cqe_version;
+       /* Transport Domain number */
+       u32                     tdn;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
        struct mlx5_pagefault   mpfault;
 };
 
+struct mlx5_ib_ubuffer {
+       struct ib_umem         *umem;
+       int                     buf_size;
+       u64                     buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+       struct mlx5_ib_qp       *container_mibqp;
+       struct mlx5_core_qp     mqp;
+       struct mlx5_ib_ubuffer  ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+       struct mlx5_ib_qp_base  base;
+       u16                     xrcdn;
+       u8                      alt_port;
+       u8                      atomic_rd_en;
+       u8                      resp_depth;
+};
+
 struct mlx5_ib_rq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *rq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
        u32                     tirn;
+       u8                      state;
+};
+
+struct mlx5_ib_sq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *sq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
+       u32                     tisn;
+       u8                      state;
 };
 
 struct mlx5_ib_raw_packet_qp {
+       struct mlx5_ib_sq sq;
        struct mlx5_ib_rq rq;
 };
 
 struct mlx5_ib_qp {
        struct ib_qp            ibqp;
        union {
-               struct mlx5_core_qp             mqp;
-               struct mlx5_ib_raw_packet_qp    raw_packet_qp;
+               struct mlx5_ib_qp_trans trans_qp;
+               struct mlx5_ib_raw_packet_qp raw_packet_qp;
        };
-
        struct mlx5_buf         buf;
 
        struct mlx5_db          db;
        struct mlx5_ib_wq       rq;
 
-       u32                     doorbell_qpn;
        u8                      sq_signal_bits;
        u8                      fm_cache;
-       int                     sq_max_wqes_per_wr;
-       int                     sq_spare_wqes;
        struct mlx5_ib_wq       sq;
 
-       struct ib_umem         *umem;
-       int                     buf_size;
-
        /* serialize qp state modifications
         */
        struct mutex            mutex;
-       u16                     xrcdn;
        u32                     flags;
        u8                      port;
-       u8                      alt_port;
-       u8                      atomic_rd_en;
-       u8                      resp_depth;
        u8                      state;
-       int                     mlx_type;
        int                     wq_sig;
        int                     scat_cqe;
        int                     max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
 enum mlx5_ib_qp_flags {
        MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
        MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
+       MLX5_IB_QP_CROSS_CHANNEL                = 1 << 2,
+       MLX5_IB_QP_MANAGED_SEND                 = 1 << 3,
+       MLX5_IB_QP_MANAGED_RECV                 = 1 << 4,
 };
 
 struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
        struct mlx5_ib_cq_buf  *resize_buf;
        struct ib_umem         *resize_umem;
        int                     cqe_size;
+       u32                     create_flags;
 };
 
 struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
        struct ib_srq   *s1;
 };
 
+struct mlx5_roce {
+       /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+        * netdev pointer
+        */
+       rwlock_t                netdev_lock;
+       struct net_device       *netdev;
+       struct notifier_block   nb;
+};
+
 struct mlx5_ib_dev {
        struct ib_device                ib_dev;
        struct mlx5_core_dev            *mdev;
+       struct mlx5_roce                roce;
        MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
        int                             num_ports;
        /* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
 
 static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
 {
-       return container_of(mqp, struct mlx5_ib_qp, mqp);
+       return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 }
 
 static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
                 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
                 const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah);
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length);
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                                const struct ib_cq_init_attr *attr,
                                struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index);
+
 static inline void init_query_mad(struct ib_smp *mad)
 {
        mad->base_version  = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
 
+static inline u32 check_cq_create_flags(u32 flags)
+{
+       /*
+        * It returns non-zero value for unsupported CQ
+        * create flags, otherwise it returns zero.
+        */
+       return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+                         IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+                                    u32 *user_index)
+{
+       if (cqe_version) {
+               if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+                   (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+                       return -EINVAL;
+               *user_index = cmd_uidx;
+       } else {
+               *user_index = MLX5_IB_DEFAULT_UIDX;
+       }
+
+       return 0;
+}
 #endif /* MLX5_IB_H */
index aa8391e..b8d7636 100644 (file)
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
                                      struct mlx5_ib_pfault *pfault,
-                                     int error) {
+                                     int error)
+{
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
-       int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
+       int ret = mlx5_core_page_fault_resume(dev->mdev,
+                                             qpn,
                                              pfault->mpfault.flags,
                                              error);
        if (ret)
-               pr_err("Failed to resolve the page fault on QP 0x%x\n",
-                      qp->mqp.qpn);
+               pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
 }
 
 /*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 #if defined(DEBUG)
        u32 ctrl_wqe_index, ctrl_qpn;
 #endif
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
        if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
        if (ds == 0) {
                mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
-                           wqe_index, qp->mqp.qpn);
+                           wqe_index, qpn);
                return -EFAULT;
        }
 
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
                        MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
        if (wqe_index != ctrl_wqe_index) {
                mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_wqe_index);
                return -EFAULT;
        }
 
        ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
                MLX5_WQE_CTRL_QPN_SHIFT;
-       if (qp->mqp.qpn != ctrl_qpn) {
+       if (qpn != ctrl_qpn) {
                mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_qpn);
                return -EFAULT;
        }
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        int resume_with_error = 0;
        u16 wqe_index = pfault->mpfault.wqe.wqe_index;
        int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        }
 
        ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
-                                   PAGE_SIZE);
+                                   PAGE_SIZE, &qp->trans_qp.base);
        if (ret < 0) {
                mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
-                           -ret, wqe_index, qp->mqp.qpn);
+                           -ret, wqe_index, qpn);
                resume_with_error = 1;
                goto resolve_page_fault;
        }
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
 resolve_page_fault:
        mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
-                   qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+                   qpn, resume_with_error,
+                   pfault->mpfault.flags);
 
        free_page((unsigned long)buffer);
 }
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
        qp->disable_page_faults = 1;
        spin_lock_init(&qp->disable_page_faults_lock);
 
-       qp->mqp.pfault_handler  = mlx5_ib_pfault_handler;
+       qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
 
        for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
                INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
index 307bdbc..9116bc3 100644 (file)
@@ -32,6 +32,8 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
 #include "user.h"
 
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
  * Return: the number of bytes copied, or an error code.
  */
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length)
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base)
 {
        struct ib_device *ibdev = qp->ibqp.device;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
        size_t offset;
        size_t wq_end;
-       struct ib_umem *umem = qp->umem;
+       struct ib_umem *umem = base->ubuffer.umem;
        u32 first_copy_length;
        int wqe_length;
        int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
        struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
        struct ib_event event;
 
-       if (type == MLX5_EVENT_TYPE_PATH_MIG)
-               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+       if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+               /* This event is only valid for trans_qps */
+               to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+       }
 
        if (ibqp->event_handler) {
                event.device     = ibqp->device;
@@ -366,7 +371,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 static int set_user_buf_size(struct mlx5_ib_dev *dev,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_ib_create_qp *ucmd)
+                           struct mlx5_ib_create_qp *ucmd,
+                           struct mlx5_ib_qp_base *base,
+                           struct ib_qp_init_attr *attr)
 {
        int desc_sz = 1 << qp->sq.wqe_shift;
 
@@ -391,8 +398,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
                return -EINVAL;
        }
 
-       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-               (qp->sq.wqe_cnt << 6);
+       if (attr->qp_type == IB_QPT_RAW_PACKET) {
+               base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+               qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+       } else {
+               base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+                                        (qp->sq.wqe_cnt << 6);
+       }
 
        return 0;
 }
@@ -578,8 +590,8 @@ static int to_mlx5_st(enum ib_qp_type type)
        case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
        case IB_QPT_GSI:                return MLX5_QP_ST_QP1;
        case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
-       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_RAW_PACKET:
+       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_MAX:
        default:                return -EINVAL;
        }
@@ -590,13 +602,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
        return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
 }
 
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+                           struct ib_pd *pd,
+                           unsigned long addr, size_t size,
+                           struct ib_umem **umem,
+                           int *npages, int *page_shift, int *ncont,
+                           u32 *offset)
+{
+       int err;
+
+       *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+       if (IS_ERR(*umem)) {
+               mlx5_ib_dbg(dev, "umem_get failed\n");
+               return PTR_ERR(*umem);
+       }
+
+       mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+       err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+       if (err) {
+               mlx5_ib_warn(dev, "bad offset\n");
+               goto err_umem;
+       }
+
+       mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+                   addr, size, *npages, *page_shift, *ncont, *offset);
+
+       return 0;
+
+err_umem:
+       ib_umem_release(*umem);
+       *umem = NULL;
+
+       return err;
+}
+
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                          struct mlx5_ib_qp *qp, struct ib_udata *udata,
+                         struct ib_qp_init_attr *attr,
                          struct mlx5_create_qp_mbox_in **in,
-                         struct mlx5_ib_create_qp_resp *resp, int *inlen)
+                         struct mlx5_ib_create_qp_resp *resp, int *inlen,
+                         struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
        struct mlx5_ib_create_qp ucmd;
+       struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
        int page_shift = 0;
        int uar_index;
        int npages;
@@ -615,18 +665,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        /*
         * TBD: should come from the verbs when we have the API
         */
-       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
-       if (uuarn < 0) {
-               mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
-               mlx5_ib_dbg(dev, "reverting to medium latency\n");
-               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+               uuarn = MLX5_CROSS_CHANNEL_UUAR;
+       else {
+               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
                if (uuarn < 0) {
-                       mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
-                       mlx5_ib_dbg(dev, "reverting to high latency\n");
-                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                       mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+                       mlx5_ib_dbg(dev, "reverting to medium latency\n");
+                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
                        if (uuarn < 0) {
-                               mlx5_ib_warn(dev, "uuar allocation failed\n");
-                               return uuarn;
+                               mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+                               mlx5_ib_dbg(dev, "reverting to high latency\n");
+                               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                               if (uuarn < 0) {
+                                       mlx5_ib_warn(dev, "uuar allocation failed\n");
+                                       return uuarn;
+                               }
                        }
                }
        }
@@ -638,32 +693,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 
-       err = set_user_buf_size(dev, qp, &ucmd);
+       err = set_user_buf_size(dev, qp, &ucmd, base, attr);
        if (err)
                goto err_uuar;
 
-       if (ucmd.buf_addr && qp->buf_size) {
-               qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-                                      qp->buf_size, 0, 0);
-               if (IS_ERR(qp->umem)) {
-                       mlx5_ib_dbg(dev, "umem_get failed\n");
-                       err = PTR_ERR(qp->umem);
+       if (ucmd.buf_addr && ubuffer->buf_size) {
+               ubuffer->buf_addr = ucmd.buf_addr;
+               err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+                                      ubuffer->buf_size,
+                                      &ubuffer->umem, &npages, &page_shift,
+                                      &ncont, &offset);
+               if (err)
                        goto err_uuar;
-               }
        } else {
-               qp->umem = NULL;
-       }
-
-       if (qp->umem) {
-               mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
-                                  &ncont, NULL);
-               err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
-               if (err) {
-                       mlx5_ib_warn(dev, "bad offset\n");
-                       goto err_umem;
-               }
-               mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
-                           ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+               ubuffer->umem = NULL;
        }
 
        *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +715,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                err = -ENOMEM;
                goto err_umem;
        }
-       if (qp->umem)
-               mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       if (ubuffer->umem)
+               mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+                                    (*in)->pas, 0);
        (*in)->ctx.log_pg_sz_remote_qpn =
                cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
        (*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +748,31 @@ err_free:
        kvfree(*in);
 
 err_umem:
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (ubuffer->umem)
+               ib_umem_release(ubuffer->umem);
 
 err_uuar:
        free_uuar(&context->uuari, uuarn);
        return err;
 }
 
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+                           struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
 
        context = to_mucontext(pd->uobject->context);
        mlx5_ib_db_unmap_user(context, &qp->db);
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (base->ubuffer.umem)
+               ib_umem_release(base->ubuffer.umem);
        free_uuar(&context->uuari, qp->uuarn);
 }
 
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_create_qp_mbox_in **in, int *inlen)
+                           struct mlx5_create_qp_mbox_in **in, int *inlen,
+                           struct mlx5_ib_qp_base *base)
 {
        enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
        struct mlx5_uuar_info *uuari;
@@ -758,9 +804,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 
        qp->rq.offset = 0;
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
-       qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+       base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
 
-       err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+       err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
        if (err) {
                mlx5_ib_dbg(dev, "err %d\n", err);
                goto err_uuar;
@@ -853,19 +899,304 @@ static int is_connected(enum ib_qp_type qp_type)
        return 0;
 }
 
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_sq *sq, u32 tdn)
+{
+       u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+       void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+       memset(in, 0, sizeof(in));
+
+       MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+       return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_sq *sq, void *qpin,
+                                  struct ib_pd *pd)
+{
+       struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+       __be64 *pas;
+       void *in;
+       void *sqc;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       void *wq;
+       int inlen;
+       int err;
+       int page_shift = 0;
+       int npages;
+       int ncont = 0;
+       u32 offset = 0;
+
+       err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+                              &sq->ubuffer.umem, &npages, &page_shift,
+                              &ncont, &offset);
+       if (err)
+               return err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_umem;
+       }
+
+       sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+       MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+       MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+       MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+       MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+       wq = MLX5_ADDR_OF(sqc, sqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+       MLX5_SET(wq, wq, log_wq_pg_sz,  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(wq, wq, page_offset, offset);
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+       err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+       kvfree(in);
+
+       if (err)
+               goto err_umem;
+
+       return 0;
+
+err_umem:
+       ib_umem_release(sq->ubuffer.umem);
+       sq->ubuffer.umem = NULL;
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+       ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+       u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+       u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+       u32 log_rq_size   = MLX5_GET(qpc, qpc, log_rq_size);
+       u32 page_offset   = MLX5_GET(qpc, qpc, page_offset);
+       u32 po_quanta     = 1 << (log_page_size - 6);
+       u32 rq_sz         = 1 << (log_rq_size + 4 + log_rq_stride);
+       u32 page_size     = 1 << log_page_size;
+       u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
+       u32 rq_num_pas    = (rq_sz_po + page_size - 1) / page_size;
+
+       return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_rq *rq, void *qpin)
+{
+       __be64 *pas;
+       __be64 *qp_pas;
+       void *in;
+       void *rqc;
+       void *wq;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       int inlen;
+       int err;
+       u32 rq_pas_size = get_rq_pas_size(qpc);
+
+       inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, vsd, 1);
+       MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+       MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+       MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+       MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+       wq = MLX5_ADDR_OF(rqc, rqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, end_padding_mode,
+                MLX5_GET(qpc, qpc, end_padding_mode));
+       MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+       MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+       memcpy(pas, qp_pas, rq_pas_size);
+
+       err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_rq *rq, u32 tdn)
+{
+       u32 *in;
+       void *tirc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+       MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+       MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+       err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               struct mlx5_create_qp_mbox_in *in,
+                               struct ib_pd *pd)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct ib_uobject *uobj = pd->uobject;
+       struct ib_ucontext *ucontext = uobj->context;
+       struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+       int err;
+       u32 tdn = mucontext->tdn;
+
+       if (qp->sq.wqe_cnt) {
+               err = create_raw_packet_qp_tis(dev, sq, tdn);
+               if (err)
+                       return err;
+
+               err = create_raw_packet_qp_sq(dev, sq, in, pd);
+               if (err)
+                       goto err_destroy_tis;
+
+               sq->base.container_mibqp = qp;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = create_raw_packet_qp_rq(dev, rq, in);
+               if (err)
+                       goto err_destroy_sq;
+
+               rq->base.container_mibqp = qp;
+
+               err = create_raw_packet_qp_tir(dev, rq, tdn);
+               if (err)
+                       goto err_destroy_rq;
+       }
+
+       qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+                                                    rq->base.mqp.qpn;
+
+       return 0;
+
+err_destroy_rq:
+       destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+       if (!qp->sq.wqe_cnt)
+               return err;
+       destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+       destroy_raw_packet_qp_tis(dev, sq);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       if (qp->rq.wqe_cnt) {
+               destroy_raw_packet_qp_tir(dev, rq);
+               destroy_raw_packet_qp_rq(dev, rq);
+       }
+
+       if (qp->sq.wqe_cnt) {
+               destroy_raw_packet_qp_sq(dev, sq);
+               destroy_raw_packet_qp_tis(dev, sq);
+       }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+                                   struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       sq->sq = &qp->sq;
+       rq->rq = &qp->rq;
+       sq->doorbell = &qp->db;
+       rq->doorbell = &qp->db;
+}
+
 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_resources *devr = &dev->devr;
        struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_ib_qp_base *base;
        struct mlx5_ib_create_qp_resp resp;
        struct mlx5_create_qp_mbox_in *in;
        struct mlx5_ib_create_qp ucmd;
        int inlen = sizeof(*in);
        int err;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       void *qpc;
+
+       base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
 
-       mlx5_ib_odp_create_qp(qp);
+       if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+               mlx5_ib_odp_create_qp(qp);
 
        mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
@@ -880,6 +1211,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                }
        }
 
+       if (init_attr->create_flags &
+                       (IB_QP_CREATE_CROSS_CHANNEL |
+                        IB_QP_CREATE_MANAGED_SEND |
+                        IB_QP_CREATE_MANAGED_RECV)) {
+               if (!MLX5_CAP_GEN(mdev, cd)) {
+                       mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+                       return -EINVAL;
+               }
+               if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+                       qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+                       qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+                       qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+       }
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -889,6 +1235,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                        return -EFAULT;
                }
 
+               err = get_qp_user_index(to_mucontext(pd->uobject->context),
+                                       &ucmd, udata->inlen, &uidx);
+               if (err)
+                       return err;
+
                qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
                qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
        } else {
@@ -918,11 +1269,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                                            ucmd.sq_wqe_count, max_wqes);
                                return -EINVAL;
                        }
-                       err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+                       err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+                                            &resp, &inlen, base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                } else {
-                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+                                              base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                }
@@ -954,6 +1307,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                int rcqe_sz;
                int scqe_sz;
@@ -1018,26 +1378,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
 
-       err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+       if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(qpc, qpc, user_index, uidx);
+       }
+
+       if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+               qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+               raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+               err = create_raw_packet_qp(dev, qp, in, pd);
+       } else {
+               err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+       }
+
        if (err) {
                mlx5_ib_dbg(dev, "create qp failed\n");
                goto err_create;
        }
 
        kvfree(in);
-       /* Hardware wants QPN written in big-endian order (after
-        * shifting) for send doorbell.  Precompute this value to save
-        * a little bit when posting sends.
-        */
-       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
 
-       qp->mqp.event = mlx5_ib_qp_event;
+       base->container_mibqp = qp;
+       base->mqp.event = mlx5_ib_qp_event;
 
        return 0;
 
 err_create:
        if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(pd, qp);
+               destroy_qp_user(pd, qp, base);
        else if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
 
@@ -1129,11 +1498,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        case IB_QPT_UD:
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
+       case IB_QPT_RAW_PACKET:
                *send_cq = to_mcq(qp->ibqp.send_cq);
                *recv_cq = to_mcq(qp->ibqp.recv_cq);
                break;
 
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                *send_cq = NULL;
@@ -1142,45 +1511,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        }
 }
 
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation);
+
 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_cq *send_cq, *recv_cq;
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_modify_qp_mbox_in *in;
        int err;
 
+       base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
+
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
                return;
 
        if (qp->state != IB_QPS_RESET) {
-               mlx5_ib_qp_disable_pagefaults(qp);
-               if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
-                                       MLX5_QP_STATE_RST, in, 0, &qp->mqp))
-                       mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
-                                    qp->mqp.qpn);
+               if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+                       mlx5_ib_qp_disable_pagefaults(qp);
+                       err = mlx5_core_qp_modify(dev->mdev,
+                                                 MLX5_CMD_OP_2RST_QP, in, 0,
+                                                 &base->mqp);
+               } else {
+                       err = modify_raw_packet_qp(dev, qp,
+                                                  MLX5_CMD_OP_2RST_QP);
+               }
+               if (err)
+                       mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+                                    base->mqp.qpn);
        }
 
        get_cqs(qp, &send_cq, &recv_cq);
 
        if (qp->create_type == MLX5_QP_KERNEL) {
                mlx5_ib_lock_cqs(send_cq, recv_cq);
-               __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
                if (send_cq != recv_cq)
-                       __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+                                          NULL);
                mlx5_ib_unlock_cqs(send_cq, recv_cq);
        }
 
-       err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
-       if (err)
-               mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
-       kfree(in);
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               destroy_raw_packet_qp(dev, qp);
+       } else {
+               err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+               if (err)
+                       mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+                                    base->mqp.qpn);
+       }
 
+       kfree(in);
 
        if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
        else if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(&get_pd(qp)->ibpd, qp);
+               destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
 }
 
 static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1225,6 +1615,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
        if (pd) {
                dev = to_mdev(pd->device);
+
+               if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+                       if (!pd->uobject) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+                               return ERR_PTR(-EINVAL);
+                       } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+                               return ERR_PTR(-EINVAL);
+                       }
+               }
        } else {
                /* being cautious here */
                if (init_attr->qp_type != IB_QPT_XRC_TGT &&
@@ -1250,6 +1650,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                }
 
                /* fall through */
+       case IB_QPT_RAW_PACKET:
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_UD:
@@ -1272,19 +1673,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                else if (is_qp1(init_attr->qp_type))
                        qp->ibqp.qp_num = 1;
                else
-                       qp->ibqp.qp_num = qp->mqp.qpn;
+                       qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
 
                mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
-                           qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+                           qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+                           to_mcq(init_attr->recv_cq)->mcq.cqn,
                            to_mcq(init_attr->send_cq)->mcq.cqn);
 
-               qp->xrcdn = xrcdn;
+               qp->trans_qp.xrcdn = xrcdn;
 
                break;
 
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1719,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
                dest_rd_atomic = attr->max_dest_rd_atomic;
        else
-               dest_rd_atomic = qp->resp_depth;
+               dest_rd_atomic = qp->trans_qp.resp_depth;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
                access_flags = attr->qp_access_flags;
        else
-               access_flags = qp->atomic_rd_en;
+               access_flags = qp->trans_qp.atomic_rd_en;
 
        if (!dest_rd_atomic)
                access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1761,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
        return rate + MLX5_STAT_RATE_OFFSET;
 }
 
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+                                     struct mlx5_ib_sq *sq, u8 sl)
+{
+       void *in;
+       void *tisc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+       tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+       MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+       err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        const struct ib_ah_attr *ah,
                         struct mlx5_qp_path *path, u8 port, int attr_mask,
                         u32 path_flags, const struct ib_qp_attr *attr)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
        int err;
 
-       path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
-       path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
        if (attr_mask & IB_QP_PKEY_INDEX)
                path->pkey_index = attr->pkey_index;
 
-       path->grh_mlid  = ah->src_path_bits & 0x7f;
-       path->rlid      = cpu_to_be16(ah->dlid);
-
        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >=
                    dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1805,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
                               dev->mdev->port_caps[port - 1].gid_table_len);
                        return -EINVAL;
                }
-               path->grh_mlid |= 1 << 7;
+       }
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               if (!(ah->ah_flags & IB_AH_GRH))
+                       return -EINVAL;
+               memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+               path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+                                                         ah->grh.sgid_index);
+               path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+       } else {
+               path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+               path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+                                                                       0;
+               path->rlid = cpu_to_be16(ah->dlid);
+               path->grh_mlid = ah->src_path_bits & 0x7f;
+               if (ah->ah_flags & IB_AH_GRH)
+                       path->grh_mlid  |= 1 << 7;
+               path->dci_cfi_prio_sl = ah->sl & 0xf;
+       }
+
+       if (ah->ah_flags & IB_AH_GRH) {
                path->mgid_index = ah->grh.sgid_index;
                path->hop_limit  = ah->grh.hop_limit;
                path->tclass_flowlabel =
@@ -1401,7 +1843,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
        if (attr_mask & IB_QP_TIMEOUT)
                path->ackto_lt = attr->timeout << 3;
 
-       path->sl = ah->sl & 0xf;
+       if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+               return modify_raw_packet_eth_prio(dev->mdev,
+                                                 &qp->raw_packet_qp.sq,
+                                                 ah->sl & 0xf);
 
        return 0;
 }
@@ -1549,12 +1994,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
        return result;
 }
 
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_rq *rq, int new_state)
+{
+       void *in;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+       rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, state, new_state);
+
+       err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       rq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_sq *sq, int new_state)
+{
+       void *in;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+       sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, state, new_state);
+
+       err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       sq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       int rq_state;
+       int sq_state;
+       int err;
+
+       switch (operation) {
+       case MLX5_CMD_OP_RST2INIT_QP:
+               rq_state = MLX5_RQC_STATE_RDY;
+               sq_state = MLX5_SQC_STATE_RDY;
+               break;
+       case MLX5_CMD_OP_2ERR_QP:
+               rq_state = MLX5_RQC_STATE_ERR;
+               sq_state = MLX5_SQC_STATE_ERR;
+               break;
+       case MLX5_CMD_OP_2RST_QP:
+               rq_state = MLX5_RQC_STATE_RST;
+               sq_state = MLX5_SQC_STATE_RST;
+               break;
+       case MLX5_CMD_OP_INIT2INIT_QP:
+       case MLX5_CMD_OP_INIT2RTR_QP:
+       case MLX5_CMD_OP_RTR2RTS_QP:
+       case MLX5_CMD_OP_RTS2RTS_QP:
+               /* Nothing to do here... */
+               return 0;
+       default:
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err =  modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->sq.wqe_cnt)
+               return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+       return 0;
+}
+
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
 {
+       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+               [MLX5_QP_STATE_RST] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
+               },
+               [MLX5_QP_STATE_INIT]  = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
+                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
+               },
+               [MLX5_QP_STATE_RTR]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
+               },
+               [MLX5_QP_STATE_RTS]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
+               },
+               [MLX5_QP_STATE_SQD] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               },
+               [MLX5_QP_STATE_SQER] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
+               },
+               [MLX5_QP_STATE_ERR] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               }
+       };
+
        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
        struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        int sqd_event;
        int mlx5_st;
        int err;
+       u16 op;
 
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
@@ -1623,7 +2211,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.port = attr->port_num;
 
        if (attr_mask & IB_QP_AV) {
-               err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+               err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
                                    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
                                    attr_mask, 0, attr);
                if (err)
@@ -1634,7 +2222,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.ackto_lt |= attr->timeout << 3;
 
        if (attr_mask & IB_QP_ALT_PATH) {
-               err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+               err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+                                   &context->alt_path,
                                    attr->alt_port_num, attr_mask, 0, attr);
                if (err)
                        goto out;
@@ -1706,41 +2295,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
         * again to RTS, and may cause the driver and the device to get out of
         * sync. */
        if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
-           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_disable_pagefaults(qp);
 
+       if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+           !optab[mlx5_cur][mlx5_new])
+               goto out;
+
+       op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
        in->optparam = cpu_to_be32(optpar);
-       err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
-                                 to_mlx5_state(new_state), in, sqd_event,
-                                 &qp->mqp);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+               err = modify_raw_packet_qp(dev, qp, op);
+       else
+               err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+                                         &base->mqp);
        if (err)
                goto out;
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_enable_pagefaults(qp);
 
        qp->state = new_state;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->atomic_rd_en = attr->qp_access_flags;
+               qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->resp_depth = attr->max_dest_rd_atomic;
+               qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
        if (attr_mask & IB_QP_PORT)
                qp->port = attr->port_num;
        if (attr_mask & IB_QP_ALT_PATH)
-               qp->alt_port = attr->alt_port_num;
+               qp->trans_qp.alt_port = attr->alt_port_num;
 
        /*
         * If we moved a kernel QP to RESET, clean up all old CQ
         * entries and reinitialize the QP.
         */
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-               mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
                if (send_cq != recv_cq)
-                       mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
 
                qp->rq.head = 0;
                qp->rq.tail = 0;
@@ -1765,15 +2364,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        enum ib_qp_state cur_state, new_state;
        int err = -EINVAL;
        int port;
+       enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 
        mutex_lock(&qp->mutex);
 
        cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
+       if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+               port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+       }
+
        if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
            !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-                               IB_LINK_LAYER_UNSPECIFIED))
+                               ll))
                goto out;
 
        if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3175,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 
        ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
                                             mlx5_opcode | ((u32)opmod << 24));
-       ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+       ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
        ctrl->fm_ce_se |= fence;
        qp->fm_cache = next_fence;
        if (unlikely(qp->wq_sig))
@@ -3003,7 +3608,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
            ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
                return;
 
-       ib_ah_attr->sl = path->sl & 0xf;
+       ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
 
        ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
        ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3626,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
        }
 }
 
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
-                    struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_sq *sq,
+                                       u8 *sq_state)
+{
+       void *out;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+       *sq_state = MLX5_GET(sqc, sqc, state);
+       sq->state = *sq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_rq *rq,
+                                       u8 *rq_state)
+{
+       void *out;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+       *rq_state = MLX5_GET(rqc, rqc, state);
+       rq->state = *rq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+                                 struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+       static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+               [MLX5_RQC_STATE_RST] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_RESET,
+               },
+               [MLX5_RQC_STATE_RDY] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE,
+               },
+               [MLX5_RQC_STATE_ERR] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_ERR,
+               },
+               [MLX5_RQ_STATE_NA] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE_BAD,
+               },
+       };
+
+       *qp_state = sqrq_trans[rq_state][sq_state];
+
+       if (*qp_state == MLX5_QP_STATE_BAD) {
+               WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+                    qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+                    qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+               return -EINVAL;
+       }
+
+       if (*qp_state == MLX5_QP_STATE)
+               *qp_state = qp->state;
+
+       return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_qp *qp,
+                                    u8 *raw_packet_qp_state)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       int err;
+       u8 sq_state = MLX5_SQ_STATE_NA;
+       u8 rq_state = MLX5_RQ_STATE_NA;
+
+       if (qp->sq.wqe_cnt) {
+               err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+               if (err)
+                       return err;
+       }
+
+       return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+                                     raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        struct ib_qp_attr *qp_attr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx5_ib_qp *qp = to_mqp(ibqp);
        struct mlx5_query_qp_mbox_out *outb;
        struct mlx5_qp_context *context;
        int mlx5_state;
        int err = 0;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       /*
-        * Wait for any outstanding page faults, in case the user frees memory
-        * based upon this query's result.
-        */
-       flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
-       mutex_lock(&qp->mutex);
        outb = kzalloc(sizeof(*outb), GFP_KERNEL);
-       if (!outb) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!outb)
+               return -ENOMEM;
+
        context = &outb->ctx;
-       err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+       err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+                                sizeof(*outb));
        if (err)
-               goto out_free;
+               goto out;
 
        mlx5_state = be32_to_cpu(context->flags) >> 28;
 
        qp->state                    = to_ib_qp_state(mlx5_state);
-       qp_attr->qp_state            = qp->state;
        qp_attr->path_mtu            = context->mtu_msgmax >> 5;
        qp_attr->path_mig_state      =
                to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3806,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
        qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
        qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
+
+out:
+       kfree(outb);
+       return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+                    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       int err = 0;
+       u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /*
+        * Wait for any outstanding page faults, in case the user frees memory
+        * based upon this query's result.
+        */
+       flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+       mutex_lock(&qp->mutex);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+               if (err)
+                       goto out;
+               qp->state = raw_packet_qp_state;
+               qp_attr->port_num = 1;
+       } else {
+               err = query_qp_attr(dev, qp, qp_attr);
+               if (err)
+                       goto out;
+       }
+
+       qp_attr->qp_state            = qp->state;
        qp_attr->cur_qp_state        = qp_attr->qp_state;
        qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
        qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
@@ -3110,12 +3866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
        qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
                IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 
-out_free:
-       kfree(outb);
-
 out:
        mutex_unlock(&qp->mutex);
        return err;
index e008505..4659256 100644 (file)
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                           struct ib_udata *udata, int buf_size, int *inlen)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_ib_create_srq ucmd;
+       struct mlx5_ib_create_srq ucmd = {};
        size_t ucmdlen;
+       void *xsrqc;
        int err;
        int npages;
        int page_shift;
        int ncont;
        u32 offset;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
-       ucmdlen =
-               (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
-                sizeof(ucmd)) ? (sizeof(ucmd) -
-                                 sizeof(ucmd.reserved)) : sizeof(ucmd);
+       if (drv_data < 0)
+               return -EINVAL;
+
+       ucmdlen = (drv_data < sizeof(ucmd)) ?
+                 drv_data : sizeof(ucmd);
 
        if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
                mlx5_ib_dbg(dev, "failed copy udata\n");
                return -EFAULT;
        }
 
-       if (ucmdlen == sizeof(ucmd) &&
-           ucmd.reserved != 0)
+       if (ucmd.reserved0 || ucmd.reserved1)
                return -EINVAL;
 
+       if (drv_data > sizeof(ucmd) &&
+           !ib_is_udata_cleared(udata, sizeof(ucmd),
+                                drv_data - sizeof(ucmd)))
+               return -EINVAL;
+
+       err = get_srq_user_index(to_mucontext(pd->uobject->context),
+                                &ucmd, udata->inlen, &uidx);
+       if (err)
+               return err;
+
        srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
        srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+       }
+
        return 0;
 
 err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        struct mlx5_wqe_srq_next_seg *next;
        int page_shift;
        int npages;
+       void *xsrqc;
 
        err = mlx5_db_alloc(dev->mdev, &srq->db);
        if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+       }
+
        return 0;
 
 err_in:
index 76fb7b9..b94a554 100644 (file)
@@ -35,6 +35,8 @@
 
 #include <linux/types.h>
 
+#include "mlx5_ib.h"
+
 enum {
        MLX5_QP_FLAG_SIGNATURE          = 1 << 0,
        MLX5_QP_FLAG_SCATTER_CQE        = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
        __u32   total_num_uuars;
        __u32   num_low_latency_uuars;
        __u32   flags;
-       __u32   reserved;
+       __u32   comp_mask;
+       __u8    max_cqe_version;
+       __u8    reserved0;
+       __u16   reserved1;
+       __u32   reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
 };
 
 struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
        __u32   max_recv_wr;
        __u32   max_srq_recv_wr;
        __u16   num_ports;
-       __u16   reserved;
+       __u16   reserved1;
+       __u32   comp_mask;
+       __u32   response_length;
+       __u8    cqe_version;
+       __u8    reserved2;
+       __u16   reserved3;
+       __u64   hca_core_clock_offset;
 };
 
 struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
        __u64   buf_addr;
        __u64   db_addr;
        __u32   flags;
-       __u32   reserved; /* explicit padding (optional on i386) */
+       __u32   reserved0; /* explicit padding (optional on i386) */
+       __u32   uidx;
+       __u32   reserved1;
 };
 
 struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
        __u32   rq_wqe_count;
        __u32   rq_wqe_shift;
        __u32   flags;
+       __u32   uidx;
+       __u32   reserved0;
+       __u64   sq_buf_addr;
 };
 
 struct mlx5_ib_create_qp_resp {
        __u32   uuar_index;
 };
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+                                   struct mlx5_ib_create_qp *ucmd,
+                                   int inlen,
+                                   u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+                                    struct mlx5_ib_create_srq *ucmd,
+                                    int inlen,
+                                    u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
 #endif /* MLX5_IB_USER_H */
index 40ba833..a6531ff 100644 (file)
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                        entry->opcode    = IB_WC_FETCH_ADD;
                        entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
                        break;
-               case MTHCA_OPCODE_BIND_MW:
-                       entry->opcode    = IB_WC_BIND_MW;
-                       break;
                default:
                        entry->opcode    = MTHCA_OPCODE_INVALID;
                        break;
index dc2d48c..9866c35 100644 (file)
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
        return &mr->ibmr;
 }
 
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
-                                      struct ib_phys_buf *buffer_list,
-                                      int                 num_phys_buf,
-                                      int                 acc,
-                                      u64                *iova_start)
-{
-       struct mthca_mr *mr;
-       u64 *page_list;
-       u64 total_size;
-       unsigned long mask;
-       int shift;
-       int npages;
-       int err;
-       int i, j, n;
-
-       mask = buffer_list[0].addr ^ *iova_start;
-       total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               total_size += buffer_list[i].size;
-       }
-
-       if (mask & ~PAGE_MASK)
-               return ERR_PTR(-EINVAL);
-
-       shift = __ffs(mask | 1 << 31);
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
-       buffer_list[0].addr &= ~0ull << shift;
-
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
-
-       npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
-       if (!npages)
-               return &mr->ibmr;
-
-       page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
-       if (!page_list) {
-               kfree(mr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-                    ++j)
-                       page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
-       mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
-                 "in PD %x; shift %d, npages %d.\n",
-                 (unsigned long long) buffer_list[0].addr,
-                 (unsigned long long) *iova_start,
-                 to_mpd(pd)->pd_num,
-                 shift, npages);
-
-       err = mthca_mr_alloc_phys(to_mdev(pd->device),
-                                 to_mpd(pd)->pd_num,
-                                 page_list, shift, npages,
-                                 *iova_start, total_size,
-                                 convert_access(acc), mr);
-
-       if (err) {
-               kfree(page_list);
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       kfree(page_list);
-       mr->umem = NULL;
-
-       return &mr->ibmr;
-}
-
 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                       u64 virt, int acc, struct ib_udata *udata)
 {
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
        dev->ib_dev.destroy_cq           = mthca_destroy_cq;
        dev->ib_dev.poll_cq              = mthca_poll_cq;
        dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
-       dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
        dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
        dev->ib_dev.dereg_mr             = mthca_dereg_mr;
        dev->ib_dev.get_port_immutable   = mthca_port_immutable;
index 35fe506..96e5fb9 100644 (file)
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        u16 pkey;
 
        ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
-                         mthca_ah_grh_present(to_mah(wr->ah)), 0,
+                         mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
                          &sqp->ud_header);
 
        err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
index 8a3ad17..cb9f0f2 100644 (file)
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
 /* External CM API Interface */
 /* instance of function pointers for client API */
 /* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
        mini_cm_accelerated,
        mini_cm_listen,
        mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        int passive_state;
        struct nes_ib_device *nesibdev;
        struct ib_mr *ibmr = NULL;
-       struct ib_phys_buf ibphysbuf;
        struct nes_pd *nespd;
        u64 tagged_offset;
        u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                u64temp = (unsigned long)nesqp;
                nesibdev = nesvnic->nesibdev;
                nespd = nesqp->nespd;
-               ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
-               ibphysbuf.size = buff_len;
                tagged_offset = (u64)(unsigned long)*start_buff;
-               ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
-                                                  &ibphysbuf, 1,
-                                                  IB_ACCESS_LOCAL_WRITE,
-                                                  &tagged_offset);
-               if (!ibmr) {
+               ibmr = nes_reg_phys_mr(&nespd->ibpd,
+                               nesqp->ietf_frame_pbase + mpa_frame_offset,
+                               buff_len, IB_ACCESS_LOCAL_WRITE,
+                               &tagged_offset);
+               if (IS_ERR(ibmr)) {
                        nes_debug(NES_DBG_CM, "Unable to register memory region"
                                  "for lSMM for cm_node = %p \n",
                                  cm_node);
                        pci_free_consistent(nesdev->pcidev,
                                            nesqp->private_data_len + nesqp->ietf_frame_size,
                                            nesqp->ietf_frame, nesqp->ietf_frame_pbase);
-                       return -ENOMEM;
+                       return PTR_ERR(ibmr);
                }
 
                ibmr->pd = &nespd->ibpd;
index 32a6420..147c2c8 100644 (file)
@@ -423,7 +423,7 @@ struct nes_cm_core {
 
        struct timer_list       tcp_timer;
 
-       struct nes_cm_ops       *api;
+       const struct nes_cm_ops *api;
 
        int (*post_event)(struct nes_cm_event *event);
        atomic_t                events_posted;
index 2042c0f..6d3a169 100644 (file)
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
        if (action == NES_ARP_DELETE) {
                nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
                nesadapter->arp_table[arp_index].ip_addr = 0;
-               memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+               eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
                nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
                return arp_index;
        }
index 137880a..8c4daf7 100644 (file)
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
 }
 
 
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
-               struct ib_mw_bind *ibmw_bind)
-{
-       u64 u64temp;
-       struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       /* struct nes_mr *nesmr = to_nesmw(ibmw); */
-       struct nes_qp *nesqp = to_nesqp(ibqp);
-       struct nes_hw_qp_wqe *wqe;
-       unsigned long flags = 0;
-       u32 head;
-       u32 wqe_misc = 0;
-       u32 qsize;
-
-       if (nesqp->ibqp_state > IB_QPS_RTS)
-               return -EINVAL;
-
-       spin_lock_irqsave(&nesqp->lock, flags);
-
-       head = nesqp->hwqp.sq_head;
-       qsize = nesqp->hwqp.sq_tail;
-
-       /* Check for SQ overflow */
-       if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
-               spin_unlock_irqrestore(&nesqp->lock, flags);
-               return -ENOMEM;
-       }
-
-       wqe = &nesqp->hwqp.sq_vbase[head];
-       /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
-       nes_fill_init_qp_wqe(wqe, nesqp, head);
-       u64temp = ibmw_bind->wr_id;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
-       wqe_misc = NES_IWARP_SQ_OP_BIND;
-
-       wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
-       if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
-               wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
-                           ibmw_bind->bind_info.mr->lkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
-                       ibmw_bind->bind_info.length);
-       wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
-       u64temp = (u64)ibmw_bind->bind_info.addr;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
-       head++;
-       if (head >= qsize)
-               head = 0;
-
-       nesqp->hwqp.sq_head = head;
-       barrier();
-
-       nes_write32(nesdev->regs+NES_WQE_ALLOC,
-                       (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
-       spin_unlock_irqrestore(&nesqp->lock, flags);
-
-       return 0;
-}
-
-
 /*
  * nes_alloc_fast_mr
  */
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
 /**
  * nes_reg_phys_mr
  */
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
-               struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
-               u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+               int acc, u64 *iova_start)
 {
        u64 region_length;
        struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
        struct nes_vpbl vpbl;
        struct nes_root_vpbl root_vpbl;
        u32 stag;
-       u32 i;
        unsigned long mask;
        u32 stag_index = 0;
        u32 next_stag_index = 0;
        u32 driver_key = 0;
-       u32 root_pbl_index = 0;
-       u32 cur_pbl_index = 0;
        int err = 0;
        int ret = 0;
        u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
 
        next_stag_index >>= 8;
        next_stag_index %= nesadapter->max_mr;
-       if (num_phys_buf > (1024*512)) {
-               return ERR_PTR(-E2BIG);
-       }
 
-       if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+       if ((addr ^ *iova_start) & ~PAGE_MASK)
                return ERR_PTR(-EINVAL);
 
        err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                return ERR_PTR(-ENOMEM);
        }
 
-       for (i = 0; i < num_phys_buf; i++) {
+       /* Allocate a 4K buffer for the PBL */
+       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+                       &vpbl.pbl_pbase);
+       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+       if (!vpbl.pbl_vbase) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               ibmr = ERR_PTR(-ENOMEM);
+               kfree(nesmr);
+               goto reg_phys_err;
+       }
 
-               if ((i & 0x01FF) == 0) {
-                       if (root_pbl_index == 1) {
-                               /* Allocate the root PBL */
-                               root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
-                                               &root_vpbl.pbl_pbase);
-                               nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
-                                               root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
-                               if (!root_vpbl.pbl_vbase) {
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
-                               if (!root_vpbl.leaf_vpbl) {
-                                       pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                                                       root_vpbl.pbl_pbase);
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[0].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[0] = vpbl;
-                       }
-                       /* Allocate a 4K buffer for the PBL */
-                       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                                       &vpbl.pbl_pbase);
-                       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
-                                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
-                       if (!vpbl.pbl_vbase) {
-                               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                               ibmr = ERR_PTR(-ENOMEM);
-                               kfree(nesmr);
-                               goto reg_phys_err;
-                       }
-                       /* Fill in the root table */
-                       if (1 <= root_pbl_index) {
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_low =
-                                               cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
-                       }
-                       root_pbl_index++;
-                       cur_pbl_index = 0;
-               }
 
-               mask = !buffer_list[i].size;
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               if (mask & ~PAGE_MASK) {
-                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                       nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
-                       ibmr = ERR_PTR(-EINVAL);
-                       kfree(nesmr);
-                       goto reg_phys_err;
-               }
+       mask = !size;
 
-               region_length += buffer_list[i].size;
-               if ((i != 0) && (single_page)) {
-                       if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
-                               single_page = 0;
-               }
-               vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
-               vpbl.pbl_vbase[cur_pbl_index++].pa_high =
-                               cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+       if (mask & ~PAGE_MASK) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+               ibmr = ERR_PTR(-EINVAL);
+               kfree(nesmr);
+               goto reg_phys_err;
        }
 
+       region_length += size;
+       vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+       vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
        stag = stag_index << 8;
        stag |= driver_key;
        stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                        stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
 
        /* Make the leaf PBL the root if only one PBL */
-       if (root_pbl_index == 1) {
-               root_vpbl.pbl_pbase = vpbl.pbl_pbase;
-       }
+       root_vpbl.pbl_pbase = vpbl.pbl_pbase;
 
        if (single_page) {
                pbl_count = 0;
        } else {
-               pbl_count = root_pbl_index;
+               pbl_count = 1;
        }
        ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
-                       buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+                       addr, pbl_count, 1, acc, iova_start,
                        &nesmr->pbls_used, &nesmr->pbl_4k);
 
        if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                ibmr = ERR_PTR(-ENOMEM);
        }
 
-       reg_phys_err:
-       /* free the resources */
-       if (root_pbl_index == 1) {
-               /* single PBL case */
-               pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
-       } else {
-               for (i=0; i<root_pbl_index; i++) {
-                       pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
-                                       root_vpbl.leaf_vpbl[i].pbl_pbase);
-               }
-               kfree(root_vpbl.leaf_vpbl);
-               pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                               root_vpbl.pbl_pbase);
-       }
-
+reg_phys_err:
+       /* single PBL case */
+       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
        return ibmr;
 }
 
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
  */
 static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       struct ib_phys_buf bl;
        u64 kva = 0;
 
        nes_debug(NES_DBG_MR, "\n");
 
-       bl.size = (u64)0xffffffffffULL;
-       bl.addr = 0;
-       return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+       return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
 }
 
-
 /**
  * nes_reg_user_mr
  */
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
        nesibdev->ibdev.destroy_cq = nes_destroy_cq;
        nesibdev->ibdev.poll_cq = nes_poll_cq;
        nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
-       nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
        nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
        nesibdev->ibdev.dereg_mr = nes_dereg_mr;
        nesibdev->ibdev.alloc_mw = nes_alloc_mw;
        nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-       nesibdev->ibdev.bind_mw = nes_bind_mw;
 
        nesibdev->ibdev.alloc_mr = nes_alloc_mr;
        nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
index a204b67..7029088 100644 (file)
@@ -190,4 +190,8 @@ struct nes_qp {
        u8                    pau_state;
        __u64                 nesuqp_addr;
 };
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+               u64 addr, u64 size, int acc, u64 *iova_start);
+
 #endif                 /* NES_VERBS_H */
index 9820074..3790771 100644 (file)
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if ((pd->uctx) &&
            (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
            (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
-               status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
-                                                   attr->dmac, &vlan_tag,
-                                                   sgid_attr.ndev->ifindex);
+               status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+                                                     attr->dmac, &vlan_tag,
+                                                     &sgid_attr.ndev->ifindex,
+                                                     NULL);
                if (status) {
                        pr_err("%s(): Failed to resolve dmac from gid." 
                                "status = %d\n", __func__, status);
index 3afb40b..5738493 100644 (file)
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.req_notify_cq = ocrdma_arm_cq;
 
        dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
-       dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
        dev->ibdev.dereg_mr = ocrdma_dereg_mr;
        dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
index 76e96f9..d4c687b 100644 (file)
@@ -3066,169 +3066,6 @@ pl_err:
        return ERR_PTR(-ENOMEM);
 }
 
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
-                                   int buf_cnt, u32 *pbe_size)
-{
-       u64 total_size = 0;
-       u64 buf_size = 0;
-       int i;
-       *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
-       *pbe_size = roundup_pow_of_two(*pbe_size);
-
-       /* find the smallest PBE size that we can have */
-       for (i = 0; i < buf_cnt; i++) {
-               /* first addr may not be page aligned, so ignore checking */
-               if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
-                                (buf_list[i].size & ~PAGE_MASK))) {
-                       return 0;
-               }
-
-               /* if configured PBE size is greater then the chosen one,
-                * reduce the PBE size.
-                */
-               buf_size = roundup(buf_list[i].size, PAGE_SIZE);
-               /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
-               buf_size = roundup_pow_of_two(buf_size);
-               if (*pbe_size > buf_size)
-                       *pbe_size = buf_size;
-
-               total_size += buf_size;
-       }
-       *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
-           (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
-       /* num_pbes = total_size / (*pbe_size);  this is implemented below. */
-
-       return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
-                             u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
-                             struct ocrdma_hw_mr *hwmr)
-{
-       int i;
-       int idx;
-       int pbes_per_buf = 0;
-       u64 buf_addr = 0;
-       int num_pbes;
-       struct ocrdma_pbe *pbe;
-       int total_num_pbes = 0;
-
-       if (!hwmr->num_pbes)
-               return;
-
-       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-       num_pbes = 0;
-
-       /* go through the OS phy regions & fill hw pbe entries into pbls. */
-       for (i = 0; i < ib_buf_cnt; i++) {
-               buf_addr = buf_list[i].addr;
-               pbes_per_buf =
-                   roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
-                   pbe_size;
-               hwmr->len += buf_list[i].size;
-               /* number of pbes can be more for one OS buf, when
-                * buffers are of different sizes.
-                * split the ib_buf to one or more pbes.
-                */
-               for (idx = 0; idx < pbes_per_buf; idx++) {
-                       /* we program always page aligned addresses,
-                        * first unaligned address is taken care by fbo.
-                        */
-                       if (i == 0) {
-                               /* for non zero fbo, assign the
-                                * start of the page.
-                                */
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & PAGE_MASK));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       } else {
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & 0xffffffff));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       }
-                       buf_addr += pbe_size;
-                       num_pbes += 1;
-                       total_num_pbes += 1;
-                       pbe++;
-
-                       if (total_num_pbes == hwmr->num_pbes)
-                               goto mr_tbl_done;
-                       /* if the pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-                               num_pbes = 0;
-                       }
-               }
-       }
-mr_tbl_done:
-       return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
-                                  struct ib_phys_buf *buf_list,
-                                  int buf_cnt, int acc, u64 *iova_start)
-{
-       int status = -ENOMEM;
-       struct ocrdma_mr *mr;
-       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
-       u32 num_pbes;
-       u32 pbe_size = 0;
-
-       if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
-               return ERR_PTR(-EINVAL);
-
-       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(status);
-
-       num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
-       if (num_pbes == 0) {
-               status = -EINVAL;
-               goto pbl_err;
-       }
-       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
-       if (status)
-               goto pbl_err;
-
-       mr->hwmr.pbe_size = pbe_size;
-       mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
-       mr->hwmr.va = *iova_start;
-       mr->hwmr.local_rd = 1;
-       mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
-       mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
-       mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
-       mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
-       mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-       status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
-       if (status)
-               goto pbl_err;
-       build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
-                         &mr->hwmr);
-       status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
-       if (status)
-               goto mbx_err;
-
-       mr->ibmr.lkey = mr->hwmr.lkey;
-       if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
-               mr->ibmr.rkey = mr->hwmr.lkey;
-       return &mr->ibmr;
-
-mbx_err:
-       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
-       kfree(mr);
-       return ERR_PTR(status);
-}
-
 static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
 {
        struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
index a2f3b4d..8b517fd 100644 (file)
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
 
 int ocrdma_dereg_mr(struct ib_mr *);
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
-                                  struct ib_phys_buf *buffer_list,
-                                  int num_phys_buf, int acc, u64 *iova_start);
 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
                                 u64 virt, int acc, struct ib_udata *);
 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
index 13ef22b..fcdf379 100644 (file)
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
 {
        int error;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(*dentry))
                error = qibfs_mknod(d_inode(parent), *dentry,
                                    mode, fops, data);
        else
                error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        return error;
 }
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
        int ret, i;
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        snprintf(unit, sizeof(unit), "%u", dd->unit);
        dir = lookup_one_len(unit, root, strlen(unit));
 
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
                goto bail;
        }
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        remove_file(dir, "counters");
        remove_file(dir, "counter_names");
        remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
                }
        }
        remove_file(dir, "flash");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = simple_rmdir(d_inode(root), dir);
        d_delete(dir);
        dput(dir);
 
 bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
        return ret;
 }
index 294f5c7..5f53304 100644 (file)
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_qib_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = qib_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -170,52 +167,6 @@ bail:
        goto done;
 }
 
-/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct qib_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == QIB_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * qib_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index 40f85bb..3eff35c 100644 (file)
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
        32768                   /* 1E */
 };
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+                        gfp_t gfp)
 {
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
+       unsigned long page = get_zeroed_page(gfp);
 
        /*
         * Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
 static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-                    enum ib_qp_type type, u8 port)
+                    enum ib_qp_type type, u8 port, gfp_t gfp)
 {
        u32 i, offset, max_scan, qpn;
        struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
        max_scan = qpt->nmaps - !offset;
        for (i = 0;;) {
                if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
+                       get_map_page(qpt, map, gfp);
                        if (unlikely(!map->page))
                                break;
                }
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
        size_t sz;
        size_t sg_list_sz;
        struct ib_qp *ret;
+       gfp_t gfp;
+
 
        if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
            init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-           init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
+           init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+               return ERR_PTR(-EINVAL);
+
+       /* GFP_NOIO is applicable in RC QPs only */
+       if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+           init_attr->qp_type != IB_QPT_RC)
+               return ERR_PTR(-EINVAL);
+
+       gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+                       GFP_NOIO : GFP_KERNEL;
 
        /* Check receive queue parameters if no SRQ is specified. */
        if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                sz = sizeof(struct qib_sge) *
                        init_attr->cap.max_send_sge +
                        sizeof(struct qib_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+               swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+                               gfp, PAGE_KERNEL);
                if (swq == NULL) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                } else if (init_attr->cap.max_recv_sge > 1)
                        sg_list_sz = sizeof(*qp->r_sg_list) *
                                (init_attr->cap.max_recv_sge - 1);
-               qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+               qp = kzalloc(sz + sg_list_sz, gfp);
                if (!qp) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_swq;
                }
                RCU_INIT_POINTER(qp->next, NULL);
-               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
                if (!qp->s_hdr) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
                                sizeof(struct qib_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
-                                                  qp->r_rq.size * sz);
+                       if (gfp != GFP_NOIO)
+                               qp->r_rq.wq = vmalloc_user(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz);
+                       else
+                               qp->r_rq.wq = __vmalloc(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz,
+                                               gfp, PAGE_KERNEL);
+
                        if (!qp->r_rq.wq) {
                                ret = ERR_PTR(-ENOMEM);
                                goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                dev = to_idev(ibpd->device);
                dd = dd_from_dev(dev);
                err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-                               init_attr->port_num);
+                               init_attr->port_num, gfp);
                if (err < 0) {
                        ret = ERR_PTR(err);
                        vfree(qp->r_rq.wq);
index de6cb6f..baf1e42 100644 (file)
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
        unsigned long flags;
        struct qib_lkey_table *rkt;
        struct qib_pd *pd;
+       int avoid_schedule = 0;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
            qp->ibqp.qp_type == IB_QPT_RC) {
                if (wqe->length > 0x80000000U)
                        goto bail_inval_free;
+               if (wqe->length <= qp->pmtu)
+                       avoid_schedule = 1;
        } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-                                 qp->port_num - 1)->ibmtu)
+                                 qp->port_num - 1)->ibmtu) {
                goto bail_inval_free;
-       else
+       } else {
                atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+               avoid_schedule = 1;
+       }
        wqe->ssn = qp->s_ssn++;
        qp->s_head = next;
 
@@ -458,7 +463,7 @@ bail_inval_free:
 bail_inval:
        ret = -EINVAL;
 bail:
-       if (!ret && !wr->next &&
+       if (!ret && !wr->next && !avoid_schedule &&
         !qib_sdma_empty(
           dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
                qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
        ibdev->poll_cq = qib_poll_cq;
        ibdev->req_notify_cq = qib_req_notify_cq;
        ibdev->get_dma_mr = qib_get_dma_mr;
-       ibdev->reg_phys_mr = qib_reg_phys_mr;
        ibdev->reg_user_mr = qib_reg_user_mr;
        ibdev->dereg_mr = qib_dereg_mr;
        ibdev->alloc_mr = qib_alloc_mr;
index bc803f3..6c5e777 100644 (file)
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                              u64 virt_addr, int mr_access_flags,
                              struct ib_udata *udata);
index f8ea069..b2fb528 100644 (file)
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        struct qib_ibdev *dev = to_idev(ibqp->device);
        struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
        struct qib_mcast *mcast = NULL;
-       struct qib_mcast_qp *p, *tmp;
+       struct qib_mcast_qp *p, *tmp, *delp = NULL;
        struct rb_node *n;
        int last = 0;
        int ret;
 
-       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-               ret = -EINVAL;
-               goto bail;
-       }
+       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+               return -EINVAL;
 
        spin_lock_irq(&ibp->lock);
 
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        while (1) {
                if (n == NULL) {
                        spin_unlock_irq(&ibp->lock);
-                       ret = -EINVAL;
-                       goto bail;
+                       return -EINVAL;
                }
 
                mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                 */
                list_del_rcu(&p->list);
                mcast->n_attached--;
+               delp = p;
 
                /* If this was the last attached QP, remove the GID too. */
                if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
        spin_unlock_irq(&ibp->lock);
+       /* QP not attached */
+       if (!delp)
+               return -EINVAL;
+       /*
+        * Wait for any list walkers to finish before freeing the
+        * list element.
+        */
+       wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+       qib_mcast_qp_free(delp);
 
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               qib_mcast_qp_free(p);
-       }
        if (last) {
                atomic_dec(&mcast->refcount);
                wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                dev->n_mcast_grps_allocated--;
                spin_unlock_irq(&dev->n_mcast_grps_lock);
        }
-
-       ret = 0;
-
-bail:
-       return ret;
+       return 0;
 }
 
 int qib_mcast_tree_empty(struct qib_ibport *ibp)
index 5e55b8b..92dc66c 100644 (file)
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
                                                        qp_flow,
                                                        &flowinfo_ops);
        if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
-               usnic_err("Failed to create dbg fs entry for flow %u\n",
-                               qp_flow->flow->flow_id);
+               usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+                               qp_flow->flow->flow_id,
+                               PTR_ERR(qp_flow->dbgfs_dentry));
        }
 }
 
index fcea3a2..5f44b66 100644 (file)
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
 
        if (!status) {
                qp_grp->state = new_state;
-               usnic_info("Transistioned %u from %s to %s",
+               usnic_info("Transitioned %u from %s to %s",
                qp_grp->grp_id,
                usnic_ib_qp_grp_state_to_string(old_state),
                usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
        return res_chunk_list;
 
 out_free_res:
-       for (i--; i > 0; i--)
+       for (i--; i >= 0; i--)
                usnic_vnic_put_resources(res_chunk_list[i]);
        kfree(res_chunk_list);
        return ERR_PTR(err);
index f8e3211..6cdb4d2 100644 (file)
@@ -51,7 +51,7 @@
 
 static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
 {
-       *fw_ver = (u64) *fw_ver_str;
+       *fw_ver = *((u64 *)fw_ver_str);
 }
 
 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        qp_grp = to_uqp_grp(ibqp);
 
-       /* TODO: Future Support All States */
        mutex_lock(&qp_grp->vf->pf->usdev_lock);
-       if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+       if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+               /* usnic devices only have one port */
+               status = -EINVAL;
+               goto out_unlock;
+       }
+       if (attr_mask & IB_QP_STATE) {
+               status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
        } else {
-               usnic_err("Unexpected combination mask: %u state: %u\n",
-                               attr_mask & IB_QP_STATE, attr->qp_state);
+               usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
                status = -EINVAL;
        }
 
+out_unlock:
        mutex_unlock(&qp_grp->vf->pf->usdev_lock);
        return status;
 }
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
                        virt_addr, length);
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (IS_ERR_OR_NULL(mr))
-               return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
 
        mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
                                        access_flags, 0);
index 414eaa5..0d9d2e6 100644 (file)
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
                          struct ib_udata *uhw);
 int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
                                struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
 int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
                                int qp_attr_mask,
                                struct ib_qp_init_attr *qp_init_attr);
index 66de93f..8875107 100644 (file)
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
        struct usnic_vnic_res *res;
        int i;
 
-       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
                return ERR_PTR(-EINVAL);
 
        ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
                return ERR_PTR(-ENOMEM);
        }
 
-       ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
-       if (!ret->res) {
-               usnic_err("Failed to allocate resources for %s. Out of memory\n",
-                               usnic_vnic_pci_name(vnic));
-               kfree(ret);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (cnt > 0) {
+               ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+               if (!ret->res) {
+                       usnic_err("Failed to allocate resources for %s. Out of memory\n",
+                                       usnic_vnic_pci_name(vnic));
+                       kfree(ret);
+                       return ERR_PTR(-ENOMEM);
+               }
 
-       spin_lock(&vnic->res_lock);
-       src = &vnic->chunks[type];
-       for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
-               res = src->res[i];
-               if (!res->owner) {
-                       src->free_cnt--;
-                       res->owner = owner;
-                       ret->res[ret->cnt++] = res;
+               spin_lock(&vnic->res_lock);
+               src = &vnic->chunks[type];
+               for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+                       res = src->res[i];
+                       if (!res->owner) {
+                               src->free_cnt--;
+                               res->owner = owner;
+                               ret->res[ret->cnt++] = res;
+                       }
                }
-       }
 
-       spin_unlock(&vnic->res_lock);
+               spin_unlock(&vnic->res_lock);
+       }
        ret->type = type;
        ret->vnic = vnic;
        WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
        int i;
        struct usnic_vnic *vnic = chunk->vnic;
 
-       spin_lock(&vnic->res_lock);
-       while ((i = --chunk->cnt) >= 0) {
-               res = chunk->res[i];
-               chunk->res[i] = NULL;
-               res->owner = NULL;
-               vnic->chunks[res->type].free_cnt++;
+       if (chunk->cnt > 0) {
+               spin_lock(&vnic->res_lock);
+               while ((i = --chunk->cnt) >= 0) {
+                       res = chunk->res[i];
+                       chunk->res[i] = NULL;
+                       res->owner = NULL;
+                       vnic->chunks[res->type].free_cnt++;
+               }
+               spin_unlock(&vnic->res_lock);
        }
-       spin_unlock(&vnic->res_lock);
 
        kfree(chunk->res);
        kfree(chunk);
index 3ede103..a6f3eab 100644 (file)
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
 void ipoib_mcast_join_task(struct work_struct *work);
 void ipoib_mcast_carrier_on_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
                       union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list);
 
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
index 3ae9726..917e46e 100644 (file)
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
 #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
 
 static struct ib_send_wr ipoib_cm_rx_drain_wr = {
-       .wr_id = IPOIB_CM_RX_DRAIN_WRID,
        .opcode = IB_WR_SEND,
 };
 
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
         * error" WC will be immediately generated for each WR we post.
         */
        p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+       ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
        if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
                ipoib_warn(priv, "failed to post drain wr\n");
 
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 int ipoib_cm_dev_init(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       int i, ret;
-       struct ib_device_attr attr;
+       int max_srq_sge, i;
 
        INIT_LIST_HEAD(&priv->cm.passive_ids);
        INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
 
        skb_queue_head_init(&priv->cm.skb_queue);
 
-       ret = ib_query_device(priv->ca, &attr);
-       if (ret) {
-               printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
-               return ret;
-       }
-
-       ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+       ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
 
-       attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
-       ipoib_cm_create_srq(dev, attr.max_srq_sge);
+       max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+       ipoib_cm_create_srq(dev, max_srq_sge);
        if (ipoib_cm_has_srq(dev)) {
-               priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
-               priv->cm.num_frags  = attr.max_srq_sge;
+               priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+               priv->cm.num_frags  = max_srq_sge;
                ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
                          priv->cm.max_cm_mtu, priv->cm.num_frags);
        } else {
index 078cadd..a53fa5f 100644 (file)
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
                              struct ethtool_drvinfo *drvinfo)
 {
        struct ipoib_dev_priv *priv = netdev_priv(netdev);
-       struct ib_device_attr *attr;
-
-       attr = kmalloc(sizeof(*attr), GFP_KERNEL);
-       if (attr && !ib_query_device(priv->ca, attr))
-               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                        "%d.%d.%d", (int)(attr->fw_ver >> 32),
-                        (int)(attr->fw_ver >> 16) & 0xffff,
-                        (int)attr->fw_ver & 0xffff);
-       kfree(attr);
+
+       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+                (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+                (int)priv->ca->attrs.fw_ver & 0xffff);
 
        strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
                sizeof(drvinfo->bus_info));
index 7d32818..25509bb 100644 (file)
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
        unsigned long flags;
        int i;
        LIST_HEAD(remove_list);
-       struct ipoib_mcast *mcast, *tmcast;
-       struct net_device *dev = priv->dev;
 
        if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
                return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
                                                          lockdep_is_held(&priv->lock))) != NULL) {
                        /* was the neigh idle for two GC periods */
                        if (time_after(neigh_obsolete, neigh->alive)) {
-                               u8 *mgid = neigh->daddr + 4;
 
-                               /* Is this multicast ? */
-                               if (*mgid == 0xff) {
-                                       mcast = __ipoib_mcast_find(dev, mgid);
-
-                                       if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
-                                               list_del(&mcast->list);
-                                               rb_erase(&mcast->rb_node, &priv->multicast_tree);
-                                               list_add_tail(&mcast->list, &remove_list);
-                                       }
-                               }
+                               ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
 
                                rcu_assign_pointer(*np,
                                                   rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 
 out_unlock:
        spin_unlock_irqrestore(&priv->lock, flags);
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 
 int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 {
-       struct ib_device_attr *device_attr;
-       int result = -ENOMEM;
-
-       device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
-       if (!device_attr) {
-               printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
-                      hca->name, sizeof *device_attr);
-               return result;
-       }
-
-       result = ib_query_device(hca, device_attr);
-       if (result) {
-               printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
-                      hca->name, result);
-               kfree(device_attr);
-               return result;
-       }
-       priv->hca_caps = device_attr->device_cap_flags;
-
-       kfree(device_attr);
+       priv->hca_caps = hca->attrs.device_cap_flags;
 
        if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
                priv->dev->hw_features = NETIF_F_SG |
index f357ca6..050dfa1 100644 (file)
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
                queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 }
 
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 {
        struct net_device *dev = mcast->dev;
        int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
        return mcast;
 }
 
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
        return 0;
 }
 
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
        return 0;
 }
 
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list)
+{
+       /* Is this multicast ? */
+       if (*mgid == 0xff) {
+               struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+               if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+                       list_del(&mcast->list);
+                       rb_erase(&mcast->rb_node, &priv->multicast_tree);
+                       list_add_tail(&mcast->list, remove_list);
+               }
+       }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+       struct ipoib_mcast *mcast, *tmcast;
+
+       list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+               ipoib_mcast_leave(mcast->dev, mcast);
+               ipoib_mcast_free(mcast);
+       }
+}
+
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(mcast->dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 
        /*
         * Double check that we are still up
index 9080161..c827c93 100644 (file)
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 
                ib_conn = &iser_conn->ib_conn;
                if (ib_conn->pi_support) {
-                       u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+                       u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
 
                        scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
                        scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                 * max fastreg page list length.
                 */
                shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
-                       ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+                       ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
                shost->max_sectors = min_t(unsigned int,
                        1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
 
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
        release_wq = alloc_workqueue("release workqueue", 0, 0);
        if (!release_wq) {
                iser_err("failed to allocate release workqueue\n");
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto err_alloc_wq;
        }
 
        iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
        if (!iscsi_iser_scsi_transport) {
                iser_err("iscsi_register_transport failed\n");
                err = -EINVAL;
-               goto register_transport_failure;
+               goto err_reg;
        }
 
        return 0;
 
-register_transport_failure:
+err_reg:
+       destroy_workqueue(release_wq);
+err_alloc_wq:
        kmem_cache_destroy(ig.desc_cache);
 
        return err;
index 8a5998e..95f0a64 100644 (file)
@@ -48,6 +48,7 @@
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/iser.h>
 
 #include <linux/interrupt.h>
 #include <linux/wait.h>
                                         - ISER_MAX_RX_MISC_PDUS) /     \
                                         (1 + ISER_INFLIGHT_DATAOUTS))
 
-#define ISER_WC_BATCH_COUNT   16
 #define ISER_SIGNAL_CMD_COUNT 32
 
-#define ISER_VER                       0x10
-#define ISER_WSV                       0x08
-#define ISER_RSV                       0x04
-
-#define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
-#define ISER_BEACON_WRID               0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags:        flags support (zbva, remote_inv)
- * @rsvd:         reserved
- * @write_stag:   write rkey
- * @write_va:     write virtual address
- * @reaf_stag:    read rkey
- * @read_va:      read virtual address
- */
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag;
-       __be64  write_va;
-       __be32  read_stag;
-       __be64  read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED                0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED  0x40
-
-struct iser_cm_hdr {
-       u8      flags;
-       u8      rsvd[3];
-} __packed;
-
 /* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
 
 #define ISER_RECV_DATA_SEG_LEN 128
 #define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
 #define ISER_MAX_WRS 7
 
 /**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
  * @sig_attrs:     Signature attributes
  */
 struct iser_tx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        enum   iser_desc_type        type;
        u64                          dma_addr;
        struct ib_sge                tx_sg[2];
        int                          num_sge;
+       struct ib_cqe                cqe;
        bool                         mapped;
        u8                           wr_idx;
        union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
 };
 
 #define ISER_RX_PAD_SIZE       (256 - (ISER_RX_PAYLOAD_SIZE + \
-                                       sizeof(u64) + sizeof(struct ib_sge)))
+                                sizeof(u64) + sizeof(struct ib_sge) + \
+                                sizeof(struct ib_cqe)))
 /**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
  * @pad:           for sense data TODO: Modify to maximum sense length supported
  */
 struct iser_rx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        char                         data[ISER_RECV_DATA_SEG_LEN];
        u64                          dma_addr;
        struct ib_sge                rx_sg;
+       struct ib_cqe                cqe;
        char                         pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req:           pointer to login request buffer
+ * @resp:          pointer to login response buffer
+ * @req_dma:       DMA address of login request buffer
+ * @rsp_dma:      DMA address of login response buffer
+ * @sge:           IB sge for login post recv
+ * @cqe:           completion handler
+ */
+struct iser_login_desc {
+       void                         *req;
+       void                         *rsp;
+       u64                          req_dma;
+       u64                          rsp_dma;
+       struct ib_sge                sge;
+       struct ib_cqe                cqe;
 } __attribute__((packed));
 
 struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
 /**
  * struct iser_comp - iSER completion context
  *
- * @device:     pointer to device handle
  * @cq:         completion queue
- * @wcs:        work completion array
- * @tasklet:    Tasklet handle
  * @active_qps: Number of active QPs attached
  *              to completion context
  */
 struct iser_comp {
-       struct iser_device      *device;
        struct ib_cq            *cq;
-       struct ib_wc             wcs[ISER_WC_BATCH_COUNT];
-       struct tasklet_struct    tasklet;
        int                      active_qps;
 };
 
@@ -380,7 +361,6 @@ struct iser_reg_ops {
  *
  * @ib_device:     RDMA device
  * @pd:            Protection Domain for this device
- * @dev_attr:      Device attributes container
  * @mr:            Global DMA memory region
  * @event_handler: IB events handle routine
  * @ig_list:      entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
  *                 cpus and device max completion vectors
  * @comps:         Dinamically allocated array of completion handlers
  * @reg_ops:       Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
  */
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
-       struct ib_device_attr        dev_attr;
        struct ib_mr                 *mr;
        struct ib_event_handler      event_handler;
        struct list_head             ig_list;
        int                          refcount;
        int                          comps_used;
        struct iser_comp             *comps;
-       struct iser_reg_ops          *reg_ops;
+       const struct iser_reg_ops    *reg_ops;
+       bool                         remote_inv_sup;
 };
 
 #define ISER_CHECK_GUARD       0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
  * @rx_wr:               receive work request for batch posts
  * @device:              reference to iser device
  * @comp:                iser completion context
- * @pi_support:          Indicate device T10-PI support
- * @beacon:              beacon send wr to signal all flush errors were drained
- * @flush_comp:          completes when all connection completions consumed
  * @fr_pool:             connection fast registration poool
+ * @pi_support:          Indicate device T10-PI support
+ * @last:                last send wr to signal all flush errors were drained
+ * @last_cqe:            cqe handler for last wr
+ * @last_comp:           completes when all connection completions consumed
  */
 struct ib_conn {
        struct rdma_cm_id           *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
        struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
        struct iser_device          *device;
        struct iser_comp            *comp;
-       bool                         pi_support;
-       struct ib_send_wr            beacon;
-       struct completion            flush_comp;
        struct iser_fr_pool          fr_pool;
+       bool                         pi_support;
+       struct ib_send_wr            last;
+       struct ib_cqe                last_cqe;
+       struct ib_cqe                reg_cqe;
+       struct completion            last_comp;
 };
 
 /**
@@ -514,11 +498,7 @@ struct ib_conn {
  * @up_completion:    connection establishment completed
  *                    (state is ISER_CONN_UP)
  * @conn_list:        entry in ig conn list
- * @login_buf:        login data buffer (stores login parameters)
- * @login_req_buf:    login request buffer
- * @login_req_dma:    login request buffer dma address
- * @login_resp_buf:   login response buffer
- * @login_resp_dma:   login response buffer dma address
+ * @login_desc:       login descriptor
  * @rx_desc_head:     head of rx_descs cyclic buffer
  * @rx_descs:         rx buffers array (cyclic buffer)
  * @num_rx_descs:     number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
        struct completion            ib_completion;
        struct completion            up_completion;
        struct list_head             conn_list;
-
-       char                         *login_buf;
-       char                         *login_req_buf, *login_resp_buf;
-       u64                          login_req_dma, login_resp_dma;
+       struct iser_login_desc       login_desc;
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
        u32                          num_rx_descs;
        unsigned short               scsi_sg_tablesize;
        unsigned int                 scsi_max_sectors;
+       bool                         snd_w_inv;
 };
 
 /**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
 
 struct iser_page_vec {
        u64 *pages;
-       int length;
-       int offset;
-       int data_size;
+       int npages;
+       struct ib_mr fake_mr;
 };
 
 /**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
 
 void iser_release_work(struct work_struct *work);
 
-void iser_rcv_completion(struct iser_rx_desc *desc,
-                        unsigned long dto_xfer_len,
-                        struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
-                        struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
 
 void iser_task_rdma_init(struct iscsi_iser_task *task);
 
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     enum iser_data_dir cmd_dir);
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir);
+                     enum iser_data_dir dir,
+                     bool all_imm);
 void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
                         enum iser_data_dir dir);
 
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
        return cur_wr;
 }
 
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+       return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_login_desc, cqe);
+}
+
 #endif
index ffd00c4..ed54b38 100644 (file)
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
 
        err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
        if (err) {
                iser_err("Failed to set up Data-IN RDMA\n");
                return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
        struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
 
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+                               buf_out->data_len == imm_sz);
        if (err != 0) {
                iser_err("Failed to register write cmd RDMA mem\n");
                return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn  *iser_conn,
        ib_dma_sync_single_for_cpu(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
        tx_desc->iser_header.flags = ISER_VER;
        tx_desc->num_sge = 1;
 }
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn        *iser_conn,
 static void iser_free_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
 
-       if (!iser_conn->login_buf)
+       if (!desc->req)
                return;
 
-       if (iser_conn->login_req_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_req_dma,
-                                   ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 
-       if (iser_conn->login_resp_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_resp_dma,
-                                   ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+                           ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 
-       kfree(iser_conn->login_buf);
+       kfree(desc->req);
+       kfree(desc->rsp);
 
        /* make sure we never redo any unmapping */
-       iser_conn->login_req_dma = 0;
-       iser_conn->login_resp_dma = 0;
-       iser_conn->login_buf = NULL;
+       desc->req = NULL;
+       desc->rsp = NULL;
 }
 
 static int iser_alloc_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
-       int                     req_err, resp_err;
-
-       BUG_ON(device == NULL);
-
-       iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
-                                    ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-       if (!iser_conn->login_buf)
-               goto out_err;
-
-       iser_conn->login_req_buf  = iser_conn->login_buf;
-       iser_conn->login_resp_buf = iser_conn->login_buf +
-                                               ISCSI_DEF_MAX_RECV_SEG_LEN;
-
-       iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
-                                                    iser_conn->login_req_buf,
-                                                    ISCSI_DEF_MAX_RECV_SEG_LEN,
-                                                    DMA_TO_DEVICE);
-
-       iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
-                                                     iser_conn->login_resp_buf,
-                                                     ISER_RX_LOGIN_SIZE,
-                                                     DMA_FROM_DEVICE);
-
-       req_err  = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_req_dma);
-       resp_err = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_resp_dma);
-
-       if (req_err || resp_err) {
-               if (req_err)
-                       iser_conn->login_req_dma = 0;
-               if (resp_err)
-                       iser_conn->login_resp_dma = 0;
-               goto free_login_buf;
-       }
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+
+       desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+       if (!desc->req)
+               return -ENOMEM;
+
+       desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+                                         ISCSI_DEF_MAX_RECV_SEG_LEN,
+                                         DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->req_dma))
+               goto free_req;
+
+       desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+       if (!desc->rsp)
+               goto unmap_req;
+
+       desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+                                          ISER_RX_LOGIN_SIZE,
+                                          DMA_FROM_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->rsp_dma))
+               goto free_rsp;
+
        return 0;
 
-free_login_buf:
-       iser_free_login_buf(iser_conn);
+free_rsp:
+       kfree(desc->rsp);
+unmap_req:
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN,
+                           DMA_TO_DEVICE);
+free_req:
+       kfree(desc->req);
 
-out_err:
-       iser_err("unable to alloc or map login buf\n");
        return -ENOMEM;
 }
 
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
                        goto rx_desc_dma_map_failed;
 
                rx_desc->dma_addr = dma_addr;
-
+               rx_desc->cqe.done = iser_task_rsp;
                rx_sg = &rx_desc->rx_sg;
-               rx_sg->addr   = rx_desc->dma_addr;
+               rx_sg->addr = rx_desc->dma_addr;
                rx_sg->length = ISER_RX_PAYLOAD_SIZE;
-               rx_sg->lkey   = device->pd->local_dma_lkey;
+               rx_sg->lkey = device->pd->local_dma_lkey;
        }
 
        iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+       tx_desc->cqe.done = iser_cmd_comp;
        iser_create_send_desc(iser_conn, tx_desc);
 
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
        }
 
        tx_desc->type = ISCSI_TX_DATAOUT;
+       tx_desc->cqe.done = iser_dataout_comp;
        tx_desc->iser_header.flags = ISER_VER;
        memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
 
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
+       mdesc->cqe.done = iser_ctrl_comp;
        iser_create_send_desc(iser_conn, mdesc);
 
        device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
        data_seg_len = ntoh24(task->hdr->dlength);
 
        if (data_seg_len > 0) {
+               struct iser_login_desc *desc = &iser_conn->login_desc;
                struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
                if (task != conn->login_task) {
                        iser_err("data present on non login task!!!\n");
                        goto send_control_error;
                }
 
-               ib_dma_sync_single_for_cpu(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+                                          task->data_count, DMA_TO_DEVICE);
 
-               memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+               memcpy(desc->req, task->data, task->data_count);
 
-               ib_dma_sync_single_for_device(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+                                             task->data_count, DMA_TO_DEVICE);
 
-               tx_dsg->addr    = iser_conn->login_req_dma;
-               tx_dsg->length  = task->data_count;
-               tx_dsg->lkey    = device->pd->local_dma_lkey;
+               tx_dsg->addr = desc->req_dma;
+               tx_dsg->length = task->data_count;
+               tx_dsg->lkey = device->pd->local_dma_lkey;
                mdesc->num_sge = 2;
        }
 
@@ -562,41 +556,126 @@ send_control_error:
        return err;
 }
 
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
-                        unsigned long rx_xfer_len,
-                        struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_login_desc *desc = iser_login(wc->wr_cqe);
        struct iscsi_hdr *hdr;
-       u64 rx_dma;
-       int rx_buflen, outstanding, count, err;
+       char *data;
+       int length;
 
-       /* differentiate between login to all other PDUs */
-       if ((char *)rx_desc == iser_conn->login_resp_buf) {
-               rx_dma = iser_conn->login_resp_dma;
-               rx_buflen = ISER_RX_LOGIN_SIZE;
-       } else {
-               rx_dma = rx_desc->dma_addr;
-               rx_buflen = ISER_RX_PAYLOAD_SIZE;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "login_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = desc->rsp + sizeof(struct iser_ctrl);
+       data = desc->rsp + ISER_HEADERS_LEN;
+       length = wc->byte_len - ISER_HEADERS_LEN;
+
+       iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+                hdr->itt, length);
+
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                     DMA_FROM_DEVICE);
+
+       ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+       if (likely(rkey == desc->rsc.mr->rkey))
+               desc->rsc.mr_valid = 0;
+       else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+               desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+                     struct ib_wc *wc,
+                     struct iscsi_hdr *hdr)
+{
+       if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+               struct iscsi_task *task;
+               u32 rkey = wc->ex.invalidate_rkey;
+
+               iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+                        iser_conn, rkey);
+
+               if (unlikely(!iser_conn->snd_w_inv)) {
+                       iser_err("conn %p: unexepected remote invalidation, "
+                                "terminating connection\n", iser_conn);
+                       return -EPROTO;
+               }
+
+               task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+               if (likely(task)) {
+                       struct iscsi_iser_task *iser_task = task->dd_data;
+                       struct iser_fr_desc *desc;
+
+                       if (iser_task->dir[ISER_DIR_IN]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+
+                       if (iser_task->dir[ISER_DIR_OUT]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+               } else {
+                       iser_err("failed to get task for itt=%d\n", hdr->itt);
+                       return -EINVAL;
+               }
        }
 
-       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
-                                  rx_buflen, DMA_FROM_DEVICE);
+       return 0;
+}
 
-       hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+       struct iscsi_hdr *hdr;
+       int length;
+       int outstanding, count, err;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "task_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = &desc->iscsi_header;
+       length = wc->byte_len - ISER_HEADERS_LEN;
 
        iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
-                       hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+                hdr->itt, length);
+
+       if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+               iscsi_conn_failure(iser_conn->iscsi_conn,
+                                  ISCSI_ERR_CONN_FAILED);
+               return;
+       }
 
-       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
-                       rx_xfer_len - ISER_HEADERS_LEN);
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
 
-       ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
-                                     rx_buflen, DMA_FROM_DEVICE);
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                     DMA_FROM_DEVICE);
 
        /* decrementing conn->post_recv_buf_count only --after-- freeing the   *
         * task eliminates the need to worry on tasks which are completed in   *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
         * for the posted rx bufs refcount to become zero handles everything   */
        ib_conn->post_recv_buf_count--;
 
-       if (rx_dma == iser_conn->login_resp_dma)
-               return;
-
        outstanding = ib_conn->post_recv_buf_count;
        if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
                count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
        }
 }
 
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
-                       struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
 {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
        struct iscsi_task *task;
-       struct iser_device *device = ib_conn->device;
 
-       if (tx_desc->type == ISCSI_TX_DATAOUT) {
-               ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
-                                       ISER_HEADERS_LEN, DMA_TO_DEVICE);
-               kmem_cache_free(ig.desc_cache, tx_desc);
-               tx_desc = NULL;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "control");
+               return;
        }
 
-       if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
-               /* this arithmetic is legal by libiscsi dd_data allocation */
-               task = (void *) ((long)(void *)tx_desc -
-                                 sizeof(struct iscsi_task));
-               if (task->hdr->itt == RESERVED_ITT)
-                       iscsi_put_task(task);
-       }
+       /* this arithmetic is legal by libiscsi dd_data allocation */
+       task = (void *)desc - sizeof(struct iscsi_task);
+       if (task->hdr->itt == RESERVED_ITT)
+               iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_device *device = ib_conn->device;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "dataout");
+
+       ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+                           ISER_HEADERS_LEN, DMA_TO_DEVICE);
+       kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+
+       complete(&ib_conn->last_comp);
 }
 
 void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
index ea765fb..9a391cc 100644 (file)
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     struct iser_reg_resources *rsc,
                     struct iser_mem_reg *mem_reg);
 
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
        .alloc_reg_res  = iser_alloc_fastreg_pool,
        .free_reg_res   = iser_free_fastreg_pool,
        .reg_mem        = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
        .reg_desc_put   = iser_reg_desc_put_fr,
 };
 
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
        .alloc_reg_res  = iser_alloc_fmr_pool,
        .free_reg_res   = iser_free_fmr_pool,
        .reg_mem        = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
        .reg_desc_put   = iser_reg_desc_put_fmr,
 };
 
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       iser_err_comp(wc, "memreg");
+}
+
 int iser_assign_reg_ops(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
 
        /* Assign function handles  - based on FMR support */
-       if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
-           device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+       if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+           ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
                iser_info("FMR supported, using FMR for registration\n");
                device->reg_ops = &fmr_ops;
-       } else
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                iser_info("FastReg supported, using FastReg for registration\n");
                device->reg_ops = &fastreg_ops;
+               device->remote_inv_sup = iser_always_reg;
        } else {
                iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
                return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 {
 }
 
-#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
-                              struct ib_device *ibdev, u64 *pages,
-                              int *offset, int *data_size)
-{
-       struct scatterlist *sg, *sgl = data->sg;
-       u64 start_addr, end_addr, page, chunk_start = 0;
-       unsigned long total_sz = 0;
-       unsigned int dma_len;
-       int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
-       /* compute the offset of first element */
-       *offset = (u64) sgl[0].offset & ~MASK_4K;
-
-       new_chunk = 1;
-       cur_page  = 0;
-       for_each_sg(sgl, sg, data->dma_nents, i) {
-               start_addr = ib_sg_dma_address(ibdev, sg);
-               if (new_chunk)
-                       chunk_start = start_addr;
-               dma_len = ib_sg_dma_len(ibdev, sg);
-               end_addr = start_addr + dma_len;
-               total_sz += dma_len;
-
-               /* collect page fragments until aligned or end of SG list */
-               if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
-                       new_chunk = 0;
-                       continue;
-               }
-               new_chunk = 1;
-
-               /* address of the first page in the contiguous chunk;
-                  masking relevant for the very first SG entry,
-                  which might be unaligned */
-               page = chunk_start & MASK_4K;
-               do {
-                       pages[cur_page++] = page;
-                       page += SIZE_4K;
-               } while (page < end_addr);
-       }
-
-       *data_size = total_sz;
-       iser_dbg("page_vec->data_size:%d cur_page %d\n",
-                *data_size, cur_page);
-       return cur_page;
-}
-
 static void iser_data_buf_dump(struct iser_data_buf *data,
                               struct ib_device *ibdev)
 {
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 {
        int i;
 
-       iser_err("page vec length %d data size %d\n",
-                page_vec->length, page_vec->data_size);
-       for (i = 0; i < page_vec->length; i++)
-               iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+       iser_err("page vec npages %d data length %d\n",
+                page_vec->npages, page_vec->fake_mr.length);
+       for (i = 0; i < page_vec->npages; i++)
+               iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
 }
 
 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        struct scatterlist *sg = mem->sg;
 
        reg->sge.lkey = device->pd->local_dma_lkey;
-       reg->rkey = device->mr->rkey;
+       /*
+        * FIXME: rework the registration code path to differentiate
+        * rkey/lkey use cases
+        */
+       reg->rkey = device->mr ? device->mr->rkey : 0;
        reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
        reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
 
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        return 0;
 }
 
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+       struct iser_page_vec *page_vec =
+               container_of(mr, struct iser_page_vec, fake_mr);
+
+       page_vec->pages[page_vec->npages++] = addr;
+
+       return 0;
+}
+
 static
 int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
                      struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
        struct ib_pool_fmr *fmr;
        int ret, plen;
 
-       plen = iser_sg_to_page_vec(mem, device->ib_device,
-                                  page_vec->pages,
-                                  &page_vec->offset,
-                                  &page_vec->data_size);
-       page_vec->length = plen;
-       if (plen * SIZE_4K < page_vec->data_size) {
+       page_vec->npages = 0;
+       page_vec->fake_mr.page_size = SIZE_4K;
+       plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+                             mem->size, iser_set_page);
+       if (unlikely(plen < mem->size)) {
                iser_err("page vec too short to hold this SG\n");
                iser_data_buf_dump(mem, device->ib_device);
                iser_dump_page_vec(page_vec);
                return -EINVAL;
        }
 
-       fmr  = ib_fmr_pool_map_phys(fmr_pool,
-                                   page_vec->pages,
-                                   page_vec->length,
-                                   page_vec->pages[0]);
+       fmr  = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+                                   page_vec->npages, page_vec->pages[0]);
        if (IS_ERR(fmr)) {
                ret = PTR_ERR(fmr);
                iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
 
        reg->sge.lkey = fmr->fmr->lkey;
        reg->rkey = fmr->fmr->rkey;
-       reg->sge.addr = page_vec->pages[0] + page_vec->offset;
-       reg->sge.length = page_vec->data_size;
+       reg->sge.addr = page_vec->fake_mr.iova;
+       reg->sge.length = page_vec->fake_mr.length;
        reg->mem_h = fmr;
 
        iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
                *mask |= ISER_CHECK_GUARD;
 }
 
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+             struct ib_mr *mr,
+             struct ib_cqe *cqe)
 {
-       u32 rkey;
-
        inv_wr->opcode = IB_WR_LOCAL_INV;
-       inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+       inv_wr->wr_cqe = cqe;
        inv_wr->ex.invalidate_rkey = mr->rkey;
        inv_wr->send_flags = 0;
        inv_wr->num_sge = 0;
-
-       rkey = ib_inc_rkey(mr->rkey);
-       ib_update_fast_reg_key(mr, rkey);
 }
 
 static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
        struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_sig_handover_wr *wr;
+       struct ib_mr *mr = pi_ctx->sig_mr;
        int ret;
 
        memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
        iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
 
-       if (!pi_ctx->sig_mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+       if (pi_ctx->sig_mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_SIG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.sg_list = &data_reg->sge;
        wr->wr.num_sge = 1;
        wr->wr.send_flags = 0;
        wr->sig_attrs = sig_attrs;
-       wr->sig_mr = pi_ctx->sig_mr;
+       wr->sig_mr = mr;
        if (scsi_prot_sg_count(iser_task->sc))
                wr->prot = &prot_reg->sge;
        else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        wr->access_flags = IB_ACCESS_LOCAL_WRITE |
                           IB_ACCESS_REMOTE_READ |
                           IB_ACCESS_REMOTE_WRITE;
-       pi_ctx->sig_mr_valid = 0;
+       pi_ctx->sig_mr_valid = 1;
 
-       sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
-       sig_reg->rkey = pi_ctx->sig_mr->rkey;
+       sig_reg->sge.lkey = mr->lkey;
+       sig_reg->rkey = mr->rkey;
        sig_reg->sge.addr = 0;
        sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
 
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct iser_mem_reg *reg)
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_mr *mr = rsc->mr;
        struct ib_reg_wr *wr;
        int n;
 
-       if (!rsc->mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+       if (rsc->mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
        if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 
        wr = reg_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.send_flags = 0;
        wr->wr.num_sge = 0;
        wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     IB_ACCESS_REMOTE_WRITE |
                     IB_ACCESS_REMOTE_READ;
 
-       rsc->mr_valid = 0;
+       rsc->mr_valid = 1;
 
        reg->sge.lkey = mr->lkey;
        reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
 }
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir)
+                     enum iser_data_dir dir,
+                     bool all_imm)
 {
        struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
        bool use_dma_key;
        int err;
 
-       use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
-                      scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+       use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+                     scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
 
        if (!use_dma_key) {
                desc = device->reg_ops->reg_desc_get(ib_conn);
index 42f4da6..40c0f49 100644 (file)
 #define ISER_MAX_CQ_LEN                (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
                                 ISCSI_ISER_MAX_CONN)
 
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
-       iser_err("cq event %s (%d)\n",
-                ib_event_msg(cause->event), cause->event);
-}
-
 static void iser_qp_event_callback(struct ib_event *cause, void *context)
 {
        iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret, i, max_cqe;
 
-       ret = ib_query_device(device->ib_device, dev_attr);
-       if (ret) {
-               pr_warn("Query device failed for %s\n", device->ib_device->name);
-               return ret;
-       }
-
        ret = iser_assign_reg_ops(device);
        if (ret)
                return ret;
 
        device->comps_used = min_t(int, num_online_cpus(),
-                                device->ib_device->num_comp_vectors);
+                                ib_dev->num_comp_vectors);
 
        device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
                                GFP_KERNEL);
        if (!device->comps)
                goto comps_err;
 
-       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
 
        iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
-                 device->comps_used, device->ib_device->name,
-                 device->ib_device->num_comp_vectors, max_cqe);
+                 device->comps_used, ib_dev->name,
+                 ib_dev->num_comp_vectors, max_cqe);
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd))
                goto pd_err;
 
        for (i = 0; i < device->comps_used; i++) {
-               struct ib_cq_init_attr cq_attr = {};
                struct iser_comp *comp = &device->comps[i];
 
-               comp->device = device;
-               cq_attr.cqe = max_cqe;
-               cq_attr.comp_vector = i;
-               comp->cq = ib_create_cq(device->ib_device,
-                                       iser_cq_callback,
-                                       iser_cq_event_callback,
-                                       (void *)comp,
-                                       &cq_attr);
+               comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+                                      IB_POLL_SOFTIRQ);
                if (IS_ERR(comp->cq)) {
                        comp->cq = NULL;
                        goto cq_err;
                }
-
-               if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
-                       goto cq_err;
-
-               tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
-                            (unsigned long)comp);
        }
 
        if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
 
                device->mr = ib_get_dma_mr(device->pd, access);
                if (IS_ERR(device->mr))
-                       goto dma_mr_err;
+                       goto cq_err;
        }
 
-       INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
-                               iser_event_handler);
+       INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+                             iser_event_handler);
        if (ib_register_event_handler(&device->event_handler))
                goto handler_err;
 
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
 handler_err:
        if (device->mr)
                ib_dereg_mr(device->mr);
-dma_mr_err:
-       for (i = 0; i < device->comps_used; i++)
-               tasklet_kill(&device->comps[i].tasklet);
 cq_err:
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
                if (comp->cq)
-                       ib_destroy_cq(comp->cq);
+                       ib_free_cq(comp->cq);
        }
        ib_dealloc_pd(device->pd);
 pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
-               tasklet_kill(&comp->tasklet);
-               ib_destroy_cq(comp->cq);
+               ib_free_cq(comp->cq);
                comp->cq = NULL;
        }
 
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
                iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
                return ret;
        }
-       res->mr_valid = 1;
+       res->mr_valid = 0;
 
        return 0;
 }
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
                ret = PTR_ERR(pi_ctx->sig_mr);
                goto sig_mr_failure;
        }
-       pi_ctx->sig_mr_valid = 1;
+       pi_ctx->sig_mr_valid = 0;
        desc->pi_ctx->sig_protected = 0;
 
        return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
        struct iser_device      *device;
-       struct ib_device_attr *dev_attr;
+       struct ib_device        *ib_dev;
        struct ib_qp_init_attr  init_attr;
        int                     ret = -ENOMEM;
        int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
        BUG_ON(ib_conn->device == NULL);
 
        device = ib_conn->device;
-       dev_attr = &device->dev_attr;
+       ib_dev = device->ib_device;
 
        memset(&init_attr, 0, sizeof init_attr);
 
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
                iser_conn->max_cmds =
                        ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
        } else {
-               if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+               if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
                        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
                        iser_conn->max_cmds =
                                ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
                } else {
-                       init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+                       init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
                        iser_conn->max_cmds =
-                               ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+                               ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
                        iser_dbg("device %s supports max_send_wr %d\n",
-                                device->ib_device->name, dev_attr->max_qp_wr);
+                                device->ib_device->name, ib_dev->attrs.max_qp_wr);
                }
        }
 
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
                                 iser_conn, err);
 
                /* post an indication that all flush errors were consumed */
-               err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+               err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
                if (err) {
-                       iser_err("conn %p failed to post beacon", ib_conn);
+                       iser_err("conn %p failed to post last wr", ib_conn);
                        return 1;
                }
 
-               wait_for_completion(&ib_conn->flush_comp);
+               wait_for_completion(&ib_conn->last_comp);
        }
 
        return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 
        sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
        sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
-                                device->dev_attr.max_fast_reg_page_list_len);
+                                device->ib_device->attrs.max_fast_reg_page_list_len);
 
        if (sg_tablesize > sup_sg_tablesize) {
                sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 
        /* connection T10-PI support */
        if (iser_pi_enable) {
-               if (!(device->dev_attr.device_cap_flags &
+               if (!(device->ib_device->attrs.device_cap_flags &
                      IB_DEVICE_SIGNATURE_HANDOVER)) {
                        iser_warn("T10-PI requested but not supported on %s, "
                                  "continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
                goto failure;
 
        memset(&conn_param, 0, sizeof conn_param);
-       conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+       conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
        conn_param.initiator_depth     = 1;
        conn_param.retry_count         = 7;
        conn_param.rnr_retry_count     = 6;
 
        memset(&req_hdr, 0, sizeof(req_hdr));
-       req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
-                       ISER_SEND_W_INV_NOT_SUPPORTED);
-       conn_param.private_data         = (void *)&req_hdr;
-       conn_param.private_data_len     = sizeof(struct iser_cm_hdr);
+       req_hdr.flags = ISER_ZBVA_NOT_SUP;
+       if (!device->remote_inv_sup)
+               req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+       conn_param.private_data = (void *)&req_hdr;
+       conn_param.private_data_len = sizeof(struct iser_cm_hdr);
 
        ret = rdma_connect(cma_id, &conn_param);
        if (ret) {
@@ -863,7 +829,8 @@ failure:
        iser_connect_error(cma_id);
 }
 
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+                                  const void *private_data)
 {
        struct iser_conn *iser_conn;
        struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
        (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
        iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
+       if (private_data) {
+               u8 flags = *(u8 *)private_data;
+
+               iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+       }
+
+       iser_info("conn %p: negotiated %s invalidation\n",
+                 iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
        iser_conn->state = ISER_CONN_UP;
        complete(&iser_conn->up_completion);
 }
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                iser_route_handler(cma_id);
                break;
        case RDMA_CM_EVENT_ESTABLISHED:
-               iser_connected_handler(cma_id);
+               iser_connected_handler(cma_id, event->param.conn.private_data);
                break;
        case RDMA_CM_EVENT_ADDR_ERROR:
        case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 
 void iser_conn_init(struct iser_conn *iser_conn)
 {
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
        iser_conn->state = ISER_CONN_INIT;
-       iser_conn->ib_conn.post_recv_buf_count = 0;
-       init_completion(&iser_conn->ib_conn.flush_comp);
        init_completion(&iser_conn->stop_completion);
        init_completion(&iser_conn->ib_completion);
        init_completion(&iser_conn->up_completion);
        INIT_LIST_HEAD(&iser_conn->conn_list);
        mutex_init(&iser_conn->state_mutex);
+
+       ib_conn->post_recv_buf_count = 0;
+       ib_conn->reg_cqe.done = iser_reg_comp;
+       ib_conn->last_cqe.done = iser_last_comp;
+       ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+       ib_conn->last.opcode = IB_WR_SEND;
+       init_completion(&ib_conn->last_comp);
 }
 
  /**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn   *iser_conn,
 
        iser_conn->state = ISER_CONN_PENDING;
 
-       ib_conn->beacon.wr_id = ISER_BEACON_WRID;
-       ib_conn->beacon.opcode = IB_WR_SEND;
-
        ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
                                         (void *)iser_conn,
                                         RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
 
 int iser_post_recvl(struct iser_conn *iser_conn)
 {
-       struct ib_recv_wr rx_wr, *rx_wr_failed;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
-       struct ib_sge     sge;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+       struct ib_recv_wr wr, *wr_failed;
        int ib_ret;
 
-       sge.addr   = iser_conn->login_resp_dma;
-       sge.length = ISER_RX_LOGIN_SIZE;
-       sge.lkey   = ib_conn->device->pd->local_dma_lkey;
+       desc->sge.addr = desc->rsp_dma;
+       desc->sge.length = ISER_RX_LOGIN_SIZE;
+       desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
 
-       rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
-       rx_wr.sg_list = &sge;
-       rx_wr.num_sge = 1;
-       rx_wr.next    = NULL;
+       desc->cqe.done = iser_login_rsp;
+       wr.wr_cqe = &desc->cqe;
+       wr.sg_list = &desc->sge;
+       wr.num_sge = 1;
+       wr.next = NULL;
 
        ib_conn->post_recv_buf_count++;
-       ib_ret  = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count--;
        }
+
        return ib_ret;
 }
 
 int iser_post_recvm(struct iser_conn *iser_conn, int count)
 {
-       struct ib_recv_wr *rx_wr, *rx_wr_failed;
-       int i, ib_ret;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
        unsigned int my_rx_head = iser_conn->rx_desc_head;
        struct iser_rx_desc *rx_desc;
+       struct ib_recv_wr *wr, *wr_failed;
+       int i, ib_ret;
 
-       for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &iser_conn->rx_descs[my_rx_head];
-               rx_wr->wr_id    = (uintptr_t)rx_desc;
-               rx_wr->sg_list  = &rx_desc->rx_sg;
-               rx_wr->num_sge  = 1;
-               rx_wr->next     = rx_wr + 1;
+       for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+               rx_desc = &iser_conn->rx_descs[my_rx_head];
+               rx_desc->cqe.done = iser_task_rsp;
+               wr->wr_cqe = &rx_desc->cqe;
+               wr->sg_list = &rx_desc->rx_sg;
+               wr->num_sge = 1;
+               wr->next = wr + 1;
                my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
        }
 
-       rx_wr--;
-       rx_wr->next = NULL; /* mark end of work requests list */
+       wr--;
+       wr->next = NULL; /* mark end of work requests list */
 
        ib_conn->post_recv_buf_count += count;
-       ib_ret  = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count -= count;
        } else
                iser_conn->rx_desc_head = my_rx_head;
+
        return ib_ret;
 }
 
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
                                      DMA_TO_DEVICE);
 
        wr->next = NULL;
-       wr->wr_id = (uintptr_t)tx_desc;
+       wr->wr_cqe = &tx_desc->cqe;
        wr->sg_list = tx_desc->tx_sg;
        wr->num_sge = tx_desc->num_sge;
        wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
        return ib_ret;
 }
 
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- *     is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
-       void *start = iser_conn->rx_descs;
-       int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
-       if (wr_id >= start && wr_id < start + len)
-               return false;
-
-       return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn:   connection RDMA resources
- * @wc:        work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- *        we only cleanup in case TX type was DATAOUT. For non-FLUSH
- *        error completion we should also notify iscsi layer that
- *        connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
-                      struct ib_wc *wc)
-{
-       void *wr_id = (void *)(uintptr_t)wc->wr_id;
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
-
-       if (wc->status != IB_WC_WR_FLUSH_ERR)
-               if (iser_conn->iscsi_conn)
-                       iscsi_conn_failure(iser_conn->iscsi_conn,
-                                          ISCSI_ERR_CONN_FAILED);
-
-       if (wc->wr_id == ISER_FASTREG_LI_WRID)
-               return;
-
-       if (is_iser_tx_desc(iser_conn, wr_id)) {
-               struct iser_tx_desc *desc = wr_id;
-
-               if (desc->type == ISCSI_TX_DATAOUT)
-                       kmem_cache_free(ig.desc_cache, desc);
-       } else {
-               ib_conn->post_recv_buf_count--;
-       }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
-       struct ib_conn *ib_conn;
-       struct iser_tx_desc *tx_desc;
-       struct iser_rx_desc *rx_desc;
-
-       ib_conn = wc->qp->qp_context;
-       if (likely(wc->status == IB_WC_SUCCESS)) {
-               if (wc->opcode == IB_WC_RECV) {
-                       rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-                       iser_rcv_completion(rx_desc, wc->byte_len,
-                                           ib_conn);
-               } else
-               if (wc->opcode == IB_WC_SEND) {
-                       tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-                       iser_snd_completion(tx_desc, ib_conn);
-               } else {
-                       iser_err("Unknown wc opcode %d\n", wc->opcode);
-               }
-       } else {
-               if (wc->status != IB_WC_WR_FLUSH_ERR)
-                       iser_err("%s (%d): wr id %llx vend_err %x\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id, wc->vendor_err);
-               else
-                       iser_dbg("%s (%d): wr id %llx\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id);
-
-               if (wc->wr_id == ISER_BEACON_WRID)
-                       /* all flush errors were consumed */
-                       complete(&ib_conn->flush_comp);
-               else
-                       iser_handle_comp_error(ib_conn, wc);
-       }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
-       struct iser_comp *comp = (struct iser_comp *)data;
-       struct ib_cq *cq = comp->cq;
-       struct ib_wc *const wcs = comp->wcs;
-       int i, n, completed = 0;
-
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
-               for (i = 0; i < n; i++)
-                       iser_handle_wc(&wcs[i]);
-
-               completed += n;
-               if (completed >= iser_cq_poll_limit)
-                       break;
-       }
-
-       /*
-        * It is assumed here that arming CQ only once its empty
-        * would not cause interrupts to be missed.
-        */
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
-       iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
-       struct iser_comp *comp = cq_context;
-
-       tasklet_schedule(&comp->tasklet);
-}
-
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector)
 {
@@ -1319,3 +1160,21 @@ err:
        /* Not alot we can do here, return ambiguous guard error */
        return 0x1;
 }
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+       if (wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+               iser_err("%s failure: %s (%d) vend_err %x\n", type,
+                        ib_wc_status_msg(wc->status), wc->status,
+                        wc->vendor_err);
+
+               if (iser_conn->iscsi_conn)
+                       iscsi_conn_failure(iser_conn->iscsi_conn,
+                                          ISCSI_ERR_CONN_FAILED);
+       } else {
+               iser_dbg("%s failure: %s (%d)\n", type,
+                        ib_wc_status_msg(wc->status), wc->status);
+       }
+}
index 468c5e1..f121e61 100644 (file)
@@ -29,7 +29,6 @@
 #include <target/iscsi/iscsi_transport.h>
 #include <linux/semaphore.h>
 
-#include "isert_proto.h"
 #include "ib_isert.h"
 
 #define        ISERT_MAX_CONN          8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
        }
 }
 
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
-       int ret;
-
-       ret = ib_query_device(ib_dev, devattr);
-       if (ret) {
-               isert_err("ib_query_device() failed: %d\n", ret);
-               return ret;
-       }
-       isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
-       isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
-       return 0;
-}
-
 static struct isert_comp *
 isert_comp_get(struct isert_conn *isert_conn)
 {
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
        attr.recv_cq = comp->cq;
        attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
        attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
-       attr.cap.max_send_sge = device->dev_attr.max_sge;
-       isert_conn->max_sge = min(device->dev_attr.max_sge,
-                                 device->dev_attr.max_sge_rd);
+       attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+       isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+                                 device->ib_device->attrs.max_sge_rd);
        attr.cap.max_recv_sge = 1;
        attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
 }
 
 static int
-isert_alloc_comps(struct isert_device *device,
-                 struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
 {
        int i, max_cqe, ret = 0;
 
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
                return -ENOMEM;
        }
 
-       max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
 
        for (i = 0; i < device->comps_used; i++) {
                struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
-       struct ib_device_attr *dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret;
 
-       dev_attr = &device->dev_attr;
-       ret = isert_query_device(device->ib_device, dev_attr);
-       if (ret)
-               goto out;
+       isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+       isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
 
        /* asign function handlers */
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+       if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+           ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
                device->use_fastreg = 1;
                device->reg_rdma_mem = isert_reg_rdma;
                device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
                device->unreg_rdma_mem = isert_unmap_cmd;
        }
 
-       ret = isert_alloc_comps(device, dev_attr);
+       ret = isert_alloc_comps(device);
        if (ret)
                goto out;
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd)) {
                ret = PTR_ERR(device->pd);
                isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
        }
 
        /* Check signature cap */
-       device->pi_capable = dev_attr->device_cap_flags &
+       device->pi_capable = ib_dev->attrs.device_cap_flags &
                             IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
        return 0;
@@ -676,6 +656,32 @@ out_login_buf:
        return ret;
 }
 
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+                     struct rdma_conn_param *param)
+{
+       struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+       /* Set max inflight RDMA READ requests */
+       isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+                               attr->max_qp_init_rd_atom);
+       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+       if (param->private_data) {
+               u8 flags = *(u8 *)param->private_data;
+
+               /*
+                * use remote invalidation if the both initiator
+                * and the HCA support it
+                */
+               isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+                                         (attr->device_cap_flags &
+                                          IB_DEVICE_MEM_MGT_EXTENSIONS);
+               if (isert_conn->snd_w_inv)
+                       isert_info("Using remote invalidation\n");
+       }
+}
+
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        }
        isert_conn->device = device;
 
-       /* Set max inflight RDMA READ requests */
-       isert_conn->initiator_depth = min_t(u8,
-                               event->param.conn.initiator_depth,
-                               device->dev_attr.max_qp_init_rd_atom);
-       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+       isert_set_nego_params(isert_conn, &event->param.conn);
 
        ret = isert_conn_setup_qp(isert_conn, cma_id);
        if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
        ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
                                   ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
-       tx_desc->iser_header.flags = ISER_VER;
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+       tx_desc->iser_header.flags = ISCSI_CTRL;
 
        tx_desc->num_sge = 1;
        tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
        isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
        send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
-       send_wr->opcode = IB_WR_SEND;
+
+       if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+               send_wr->opcode  = IB_WR_SEND_WITH_INV;
+               send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+       } else {
+               send_wr->opcode = IB_WR_SEND;
+       }
+
        send_wr->sg_list = &tx_desc->tx_sg[0];
        send_wr->num_sge = isert_cmd->tx_desc.num_sge;
        send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                isert_cmd->read_va = read_va;
                isert_cmd->write_stag = write_stag;
                isert_cmd->write_va = write_va;
+               isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
 
                ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
                                        rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 static void
 isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 {
-       struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+       struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
        uint64_t read_va = 0, write_va = 0;
        uint32_t read_stag = 0, write_stag = 0;
 
-       switch (iser_hdr->flags & 0xF0) {
+       switch (iser_ctrl->flags & 0xF0) {
        case ISCSI_CTRL:
-               if (iser_hdr->flags & ISER_RSV) {
-                       read_stag = be32_to_cpu(iser_hdr->read_stag);
-                       read_va = be64_to_cpu(iser_hdr->read_va);
+               if (iser_ctrl->flags & ISER_RSV) {
+                       read_stag = be32_to_cpu(iser_ctrl->read_stag);
+                       read_va = be64_to_cpu(iser_ctrl->read_va);
                        isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
                                  read_stag, (unsigned long long)read_va);
                }
-               if (iser_hdr->flags & ISER_WSV) {
-                       write_stag = be32_to_cpu(iser_hdr->write_stag);
-                       write_va = be64_to_cpu(iser_hdr->write_va);
+               if (iser_ctrl->flags & ISER_WSV) {
+                       write_stag = be32_to_cpu(iser_ctrl->write_stag);
+                       write_va = be64_to_cpu(iser_ctrl->write_va);
                        isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
                                  write_stag, (unsigned long long)write_va);
                }
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
                isert_err("iSER Hello message\n");
                break;
        default:
-               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
                break;
        }
 
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
        struct rdma_cm_id *cm_id = isert_conn->cm_id;
        struct rdma_conn_param cp;
        int ret;
+       struct iser_cm_hdr rsp_hdr;
 
        memset(&cp, 0, sizeof(struct rdma_conn_param));
        cp.initiator_depth = isert_conn->initiator_depth;
        cp.retry_count = 7;
        cp.rnr_retry_count = 7;
 
+       memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+       rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+       if (!isert_conn->snd_w_inv)
+               rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+       cp.private_data = (void *)&rsp_hdr;
+       cp.private_data_len = sizeof(rsp_hdr);
+
        ret = rdma_accept(cm_id, &cp);
        if (ret) {
                isert_err("rdma_accept() failed with: %d\n", ret);
index 3d7fbc4..8d50453 100644 (file)
@@ -3,6 +3,8 @@
 #include <linux/in6.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
 
 #define DRV_NAME       "isert"
 #define PFX            DRV_NAME ": "
 #define isert_err(fmt, arg...) \
        pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + \
+                                sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE     (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+                                  * NOOP_OUT(2), TEXT(1),
+                                  * SCSI_TMFUNC(2), LOGOUT(1)
+                                  */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS        8
+
+#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
+                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
+                               ISERT_MAX_TX_MISC_PDUS  + \
+                               ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
+               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
 #define ISCSI_ISER_SG_TABLESIZE                256
 #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
 #define ISER_BEACON_WRID               0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
 };
 
 struct iser_rx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        char            data[ISER_RECV_DATA_SEG_LEN];
        u64             dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
 } __packed;
 
 struct iser_tx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        enum isert_desc_type type;
        u64             dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
        uint32_t                write_stag;
        uint64_t                read_va;
        uint64_t                write_va;
+       uint32_t                inv_rkey;
        u64                     pdu_buf_dma;
        u32                     pdu_buf_len;
        struct isert_conn       *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
        struct work_struct      release_work;
        struct ib_recv_wr       beacon;
        bool                    logout_posted;
+       bool                    snd_w_inv;
 };
 
 #define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
        struct isert_comp       *comps;
        int                     comps_used;
        struct list_head        dev_node;
-       struct ib_device_attr   dev_attr;
        int                     (*reg_rdma_mem)(struct iscsi_conn *conn,
                                                    struct iscsi_cmd *cmd,
                                                    struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644 (file)
index 4dccd31..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag; /* write rkey */
-       __be64  write_va;
-       __be32  read_stag;  /* read rkey */
-       __be64  read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN  8192
-#define ISER_RX_PAYLOAD_SIZE    (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE      (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1),         *
-                                  * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS        8
-
-#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
-                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
-                               ISERT_MAX_TX_MISC_PDUS  + \
-                               ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
-               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER       0x10
-#define ISER_WSV       0x08
-#define ISER_RSV       0x04
-#define ISCSI_CTRL     0x10
-#define ISER_HELLO     0x20
-#define ISER_HELLORPLY 0x30
index 3db9a65..03022f6 100644 (file)
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
 
 static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
 
 static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
                                  dev->max_pages_per_mr);
 }
 
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+       .done           = srp_drain_done,
+};
+
 /**
  * srp_destroy_qp() - destroy an RDMA queue pair
  * @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
 static void srp_destroy_qp(struct srp_rdma_ch *ch)
 {
        static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
-       static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+       static struct ib_recv_wr wr = { 0 };
        struct ib_recv_wr *bad_wr;
        int ret;
 
+       wr.wr_cqe = &srp_drain_cqe;
        /* Destroying a QP and reusing ch->done is only safe if not connected */
        WARN_ON_ONCE(ch->connected);
 
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        struct ib_fmr_pool *fmr_pool = NULL;
        struct srp_fr_pool *fr_pool = NULL;
        const int m = dev->use_fast_reg ? 3 : 1;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
        if (!init_attr)
                return -ENOMEM;
 
-       /* + 1 for SRP_LAST_WR_ID */
-       cq_attr.cqe = target->queue_size + 1;
-       cq_attr.comp_vector = ch->comp_vector;
-       recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
-                              &cq_attr);
+       /* queue_size + 1 for ib_drain_qp */
+       recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+                               ch->comp_vector, IB_POLL_SOFTIRQ);
        if (IS_ERR(recv_cq)) {
                ret = PTR_ERR(recv_cq);
                goto err;
        }
 
-       cq_attr.cqe = m * target->queue_size;
-       cq_attr.comp_vector = ch->comp_vector;
-       send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
-                              &cq_attr);
+       send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+                               ch->comp_vector, IB_POLL_DIRECT);
        if (IS_ERR(send_cq)) {
                ret = PTR_ERR(send_cq);
                goto err_recv_cq;
        }
 
-       ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
        init_attr->event_handler       = srp_qp_event;
        init_attr->cap.max_send_wr     = m * target->queue_size;
        init_attr->cap.max_recv_wr     = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        if (ch->qp)
                srp_destroy_qp(ch);
        if (ch->recv_cq)
-               ib_destroy_cq(ch->recv_cq);
+               ib_free_cq(ch->recv_cq);
        if (ch->send_cq)
-               ib_destroy_cq(ch->send_cq);
+               ib_free_cq(ch->send_cq);
 
        ch->qp = qp;
        ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        return 0;
 
 err_qp:
-       ib_destroy_qp(qp);
+       srp_destroy_qp(ch);
 
 err_send_cq:
-       ib_destroy_cq(send_cq);
+       ib_free_cq(send_cq);
 
 err_recv_cq:
-       ib_destroy_cq(recv_cq);
+       ib_free_cq(recv_cq);
 
 err:
        kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
                if (ch->fmr_pool)
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
+
        srp_destroy_qp(ch);
-       ib_destroy_cq(ch->send_cq);
-       ib_destroy_cq(ch->recv_cq);
+       ib_free_cq(ch->send_cq);
+       ib_free_cq(ch->recv_cq);
 
        /*
         * Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
        return ret <= 0 ? ret : -ENODEV;
 }
 
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+               u32 rkey)
 {
        struct ib_send_wr *bad_wr;
        struct ib_send_wr wr = {
                .opcode             = IB_WR_LOCAL_INV,
-               .wr_id              = LOCAL_INV_WR_ID_MASK,
                .next               = NULL,
                .num_sge            = 0,
                .send_flags         = 0,
                .ex.invalidate_rkey = rkey,
        };
 
+       wr.wr_cqe = &req->reg_cqe;
+       req->reg_cqe.done = srp_inv_rkey_err_done;
        return ib_post_send(ch->qp, &wr, &bad_wr);
 }
 
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
                struct srp_fr_desc **pfr;
 
                for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
-                       res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+                       res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
                        if (res < 0) {
                                shost_printk(KERN_ERR, target->scsi_host, PFX
                                  "Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
        return 0;
 }
 
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
 static int srp_map_finish_fr(struct srp_map_state *state,
+                            struct srp_request *req,
                             struct srp_rdma_ch *ch, int sg_nents)
 {
        struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        if (unlikely(n < 0))
                return n;
 
+       req->reg_cqe.done = srp_reg_mr_err_done;
+
        wr.wr.next = NULL;
        wr.wr.opcode = IB_WR_REG_MR;
-       wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+       wr.wr.wr_cqe = &req->reg_cqe;
        wr.wr.num_sge = 0;
        wr.wr.send_flags = 0;
        wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
        while (count) {
                int i, n;
 
-               n = srp_map_finish_fr(state, ch, count);
+               n = srp_map_finish_fr(state, req, ch, count);
                if (unlikely(n < 0))
                        return n;
 
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
                idb_sg->dma_length = idb_sg->length;          /* hack^2 */
 #endif
-               ret = srp_map_finish_fr(&state, ch, 1);
+               ret = srp_map_finish_fr(&state, req, ch, 1);
                if (ret < 0)
                        return ret;
        } else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
        struct srp_iu *iu;
 
-       srp_send_completion(ch->send_cq, ch);
+       ib_process_cq_direct(ch->send_cq, -1);
 
        if (list_empty(&ch->free_tx))
                return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        return iu;
 }
 
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "SEND");
+               return;
+       }
+
+       list_add(&iu->list, &ch->free_tx);
+}
+
 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
 {
        struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
        list.length = len;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_send_done;
+
        wr.next       = NULL;
-       wr.wr_id      = (uintptr_t) iu;
+       wr.wr_cqe     = &iu->cqe;
        wr.sg_list    = &list;
        wr.num_sge    = 1;
        wr.opcode     = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
        list.length = iu->size;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_recv_done;
+
        wr.next     = NULL;
-       wr.wr_id    = (uintptr_t) iu;
+       wr.wr_cqe   = &iu->cqe;
        wr.sg_list  = &list;
        wr.num_sge  = 1;
 
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
                             "problems processing SRP_AER_REQ\n");
 }
 
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
        struct ib_device *dev = target->srp_host->srp_dev->dev;
-       struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
        int res;
        u8 opcode;
 
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "RECV");
+               return;
+       }
+
        ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
                                   DMA_FROM_DEVICE);
 
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
                srp_start_tl_fail_timers(target->rport);
 }
 
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
-                             bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname)
 {
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
 
-       if (wr_id == SRP_LAST_WR_ID) {
-               complete(&ch->done);
-               return;
-       }
-
        if (ch->connected && !target->qp_in_error) {
-               if (wr_id & LOCAL_INV_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "LOCAL_INV failed with status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else if (wr_id & FAST_REG_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "FAST_REG_MR failed status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else {
-                       shost_printk(KERN_ERR, target->scsi_host,
-                                    PFX "failed %s status %s (%d) for iu %p\n",
-                                    send_err ? "send" : "receive",
-                                    ib_wc_status_msg(wc_status), wc_status,
-                                    (void *)(uintptr_t)wr_id);
-               }
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "failed %s status %s (%d) for CQE %p\n",
+                            opname, ib_wc_status_msg(wc->status), wc->status,
+                            wc->wr_cqe);
                queue_work(system_long_wq, &target->tl_err_work);
        }
        target->qp_in_error = true;
 }
 
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       srp_handle_recv(ch, &wc);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
-               }
-       }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-       struct srp_iu *iu;
-
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
-                       list_add(&iu->list, &ch->free_tx);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
-               }
-       }
-}
-
 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 {
        struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
 static void srp_add_one(struct ib_device *device)
 {
        struct srp_device *srp_dev;
-       struct ib_device_attr *dev_attr;
        struct srp_host *host;
        int mr_page_shift, p;
        u64 max_pages_per_mr;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               pr_warn("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
        if (!srp_dev)
-               goto free_attr;
+               return;
 
        srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
                            device->map_phys_fmr && device->unmap_fmr);
-       srp_dev->has_fr = (dev_attr->device_cap_flags &
+       srp_dev->has_fr = (device->attrs.device_cap_flags &
                           IB_DEVICE_MEM_MGT_EXTENSIONS);
        if (!srp_dev->has_fmr && !srp_dev->has_fr)
                dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
         * minimum of 4096 bytes. We're unlikely to build large sglists
         * out of smaller entries.
         */
-       mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
+       mr_page_shift           = max(12, ffs(device->attrs.page_size_cap) - 1);
        srp_dev->mr_page_size   = 1 << mr_page_shift;
        srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
-       max_pages_per_mr        = dev_attr->max_mr_size;
+       max_pages_per_mr        = device->attrs.max_mr_size;
        do_div(max_pages_per_mr, srp_dev->mr_page_size);
        srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
                                          max_pages_per_mr);
        if (srp_dev->use_fast_reg) {
                srp_dev->max_pages_per_mr =
                        min_t(u32, srp_dev->max_pages_per_mr,
-                             dev_attr->max_fast_reg_page_list_len);
+                             device->attrs.max_fast_reg_page_list_len);
        }
        srp_dev->mr_max_size    = srp_dev->mr_page_size *
                                   srp_dev->max_pages_per_mr;
-       pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
-                device->name, mr_page_shift, dev_attr->max_mr_size,
-                dev_attr->max_fast_reg_page_list_len,
+       pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+                device->name, mr_page_shift, device->attrs.max_mr_size,
+                device->attrs.max_fast_reg_page_list_len,
                 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
        INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
        }
 
        ib_set_client_data(device, &srp_client, srp_dev);
-
-       goto free_attr;
+       return;
 
 err_pd:
        ib_dealloc_pd(srp_dev->pd);
 
 free_dev:
        kfree(srp_dev);
-
-free_attr:
-       kfree(dev_attr);
 }
 
 static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
 {
        int ret;
 
-       BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
        if (srp_sg_tablesize) {
                pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
                if (!cmd_sg_entries)
index f6af531..9e05ce4 100644 (file)
@@ -66,11 +66,6 @@ enum {
        SRP_TAG_TSK_MGMT        = 1U << 31,
 
        SRP_MAX_PAGES_PER_MR    = 512,
-
-       LOCAL_INV_WR_ID_MASK    = 1,
-       FAST_REG_WR_ID_MASK     = 2,
-
-       SRP_LAST_WR_ID          = 0xfffffffcU,
 };
 
 enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
        struct srp_direct_buf  *indirect_desc;
        dma_addr_t              indirect_dma_addr;
        short                   nmdesc;
+       struct ib_cqe           reg_cqe;
 };
 
 /**
@@ -231,6 +227,7 @@ struct srp_iu {
        void                   *buf;
        size_t                  size;
        enum dma_data_direction direction;
+       struct ib_cqe           cqe;
 };
 
 /**
index bc5470c..0c37fee 100644 (file)
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
 static struct ib_client srpt_client;
 static void srpt_release_channel(struct srpt_rdma_ch *ch);
 static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /**
  * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
        memset(iocp, 0, sizeof *iocp);
        strcpy(iocp->id_string, SRPT_ID_STRING);
        iocp->guid = cpu_to_be64(srpt_service_guid);
-       iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
-       iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
-       iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
-       iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+       iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+       iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+       iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+       iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
        iocp->subsys_device_id = 0x0;
        iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
        iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
  * srpt_mad_recv_handler() - MAD reception callback function.
  */
 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+                                 struct ib_mad_send_buf *send_buf,
                                  struct ib_mad_recv_wc *mad_wc)
 {
        struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
        struct ib_recv_wr wr, *bad_wr;
 
        BUG_ON(!sdev);
-       wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
        list.addr = ioctx->ioctx.dma;
        list.length = srp_max_req_size;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_recv_done;
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.next = NULL;
        wr.sg_list = &list;
        wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
        list.length = len;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_send_done;
        wr.next = NULL;
-       wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.sg_list = &list;
        wr.num_sge = 1;
        wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        BUG_ON(!ch);
        BUG_ON(!ioctx);
-       BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+       BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
 
        while (ioctx->n_rdma)
-               kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+               kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
 
-       kfree(ioctx->rdma_ius);
-       ioctx->rdma_ius = NULL;
+       kfree(ioctx->rdma_wrs);
+       ioctx->rdma_wrs = NULL;
 
        if (ioctx->mapped_sg_count) {
                sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        struct scatterlist *sg, *sg_orig;
        int sg_cnt;
        enum dma_data_direction dir;
-       struct rdma_iu *riu;
+       struct ib_rdma_wr *riu;
        struct srp_direct_buf *db;
        dma_addr_t dma_addr;
        struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        ioctx->mapped_sg_count = count;
 
-       if (ioctx->rdma_ius && ioctx->n_rdma_ius)
-               nrdma = ioctx->n_rdma_ius;
+       if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+               nrdma = ioctx->n_rdma_wrs;
        else {
                nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
                        + ioctx->n_rbuf;
 
-               ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
-               if (!ioctx->rdma_ius)
+               ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+                               GFP_KERNEL);
+               if (!ioctx->rdma_wrs)
                        goto free_mem;
 
-               ioctx->n_rdma_ius = nrdma;
+               ioctx->n_rdma_wrs = nrdma;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
 
        /*
         * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
                raddr = be64_to_cpu(db->va);
-               riu->raddr = raddr;
+               riu->remote_addr = raddr;
                riu->rkey = be32_to_cpu(db->key);
-               riu->sge_cnt = 0;
+               riu->wr.num_sge = 0;
 
                /* calculate how many sge required for this remote_buf */
                while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                                rsize = 0;
                        }
 
-                       ++riu->sge_cnt;
+                       ++riu->wr.num_sge;
 
-                       if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+                       if (rsize > 0 &&
+                           riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
                                ++ioctx->n_rdma;
-                               riu->sge =
-                                   kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                           GFP_KERNEL);
-                               if (!riu->sge)
+                               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                               sizeof(*riu->wr.sg_list),
+                                               GFP_KERNEL);
+                               if (!riu->wr.sg_list)
                                        goto free_mem;
 
                                ++riu;
-                               riu->sge_cnt = 0;
-                               riu->raddr = raddr;
+                               riu->wr.num_sge = 0;
+                               riu->remote_addr = raddr;
                                riu->rkey = be32_to_cpu(db->key);
                        }
                }
 
                ++ioctx->n_rdma;
-               riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                  GFP_KERNEL);
-               if (!riu->sge)
+               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                       sizeof(*riu->wr.sg_list),
+                                       GFP_KERNEL);
+               if (!riu->wr.sg_list)
                        goto free_mem;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
        sg = sg_orig;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
        dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        for (i = 0, j = 0;
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
-               sge = riu->sge;
+               sge = riu->wr.sg_list;
                k = 0;
 
                while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                        }
 
                        ++k;
-                       if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+                       if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
                                ++riu;
-                               sge = riu->sge;
+                               sge = riu->wr.sg_list;
                                k = 0;
                        } else if (rsize > 0 && tsize > 0)
                                ++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
        ioctx->n_rbuf = 0;
        ioctx->rbufs = NULL;
        ioctx->n_rdma = 0;
-       ioctx->n_rdma_ius = 0;
-       ioctx->rdma_ius = NULL;
+       ioctx->n_rdma_wrs = 0;
+       ioctx->rdma_wrs = NULL;
        ioctx->mapped_sg_count = 0;
        init_completion(&ioctx->tx_done);
        ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
 }
 
 /**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
-       struct srpt_send_ioctx *ioctx;
-       enum srpt_command_state state;
-       u32 index;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       index = idx_from_wr_id(wr_id);
-       ioctx = ch->ioctx_ring[index];
-       state = srpt_get_cmd_state(ioctx);
-
-       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-               && state != SRPT_STATE_MGMT_RSP_SENT
-               && state != SRPT_STATE_NEED_DATA
-               && state != SRPT_STATE_DONE);
-
-       /* If SRP_RSP sending failed, undo the ch->req_lim change. */
-       if (state == SRPT_STATE_CMD_RSP_SENT
-           || state == SRPT_STATE_MGMT_RSP_SENT)
-               atomic_dec(&ch->req_lim);
-
-       srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx)
-{
-       enum srpt_command_state state;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
-       if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-                   && state != SRPT_STATE_MGMT_RSP_SENT
-                   && state != SRPT_STATE_DONE))
-               pr_debug("state = %d\n", state);
-
-       if (state != SRPT_STATE_DONE) {
-               srpt_unmap_sg_to_ib_sge(ch, ioctx);
-               transport_generic_free_cmd(&ioctx->cmd, 0);
-       } else {
-               pr_err("IB completion has been received too late for"
-                      " wr_id = %u.\n", ioctx->ioctx.index);
-       }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
  * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
  * the data that has been transferred via IB RDMA had to be postponed until the
  * check_stop_free() callback.  None of this is necessary anymore and needs to
  * be cleaned up.
  */
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx,
-                                 enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
        WARN_ON(ioctx->n_rdma <= 0);
        atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
 
-       if (opcode == SRPT_RDMA_READ_LAST) {
-               if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
-                                               SRPT_STATE_DATA_IN))
-                       target_execute_cmd(&ioctx->cmd);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n", __func__,
-                              __LINE__, srpt_get_cmd_state(ioctx));
-       } else if (opcode == SRPT_RDMA_ABORT) {
-               ioctx->rdma_aborted = true;
-       } else {
-               WARN(true, "unexpected opcode %d\n", opcode);
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
+               return;
        }
+
+       if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+                                       SRPT_STATE_DATA_IN))
+               target_execute_cmd(&ioctx->cmd);
+       else
+               pr_err("%s[%d]: wrong state = %d\n", __func__,
+                      __LINE__, srpt_get_cmd_state(ioctx));
 }
 
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
-                                     struct srpt_send_ioctx *ioctx,
-                                     enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       enum srpt_command_state state;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
 
-       state = srpt_get_cmd_state(ioctx);
-       switch (opcode) {
-       case SRPT_RDMA_READ_LAST:
-               if (ioctx->n_rdma <= 0) {
-                       pr_err("Received invalid RDMA read"
-                              " error completion with idx %d\n",
-                              ioctx->ioctx.index);
-                       break;
-               }
-               atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
-               if (state == SRPT_STATE_NEED_DATA)
-                       srpt_abort_cmd(ioctx);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n",
-                              __func__, __LINE__, state);
-               break;
-       case SRPT_RDMA_WRITE_LAST:
-               break;
-       default:
-               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
-               break;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
        }
 }
 
@@ -1926,32 +1859,26 @@ out:
        return;
 }
 
-static void srpt_process_rcv_completion(struct ib_cq *cq,
-                                       struct srpt_rdma_ch *ch,
-                                       struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_device *sdev = ch->sport->sdev;
-       struct srpt_recv_ioctx *ioctx;
-       u32 index;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_recv_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
 
-       index = idx_from_wr_id(wc->wr_id);
        if (wc->status == IB_WC_SUCCESS) {
                int req_lim;
 
                req_lim = atomic_dec_return(&ch->req_lim);
                if (unlikely(req_lim < 0))
                        pr_err("req_lim = %d < 0\n", req_lim);
-               ioctx = sdev->ioctx_ring[index];
                srpt_handle_new_iu(ch, ioctx, NULL);
        } else {
-               pr_info("receiving failed for idx %u with status %d\n",
-                       index, wc->status);
+               pr_info("receiving failed for ioctx %p with status %d\n",
+                       ioctx, wc->status);
        }
 }
 
 /**
- * srpt_process_send_completion() - Process an IB send completion.
- *
  * Note: Although this has not yet been observed during tests, at least in
  * theory it is possible that the srpt_get_send_ioctx() call invoked by
  * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,108 +1891,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
  * are queued on cmd_wait_list. The code below processes these delayed
  * requests one at a time.
  */
-static void srpt_process_send_completion(struct ib_cq *cq,
-                                        struct srpt_rdma_ch *ch,
-                                        struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_send_ioctx *send_ioctx;
-       uint32_t index;
-       enum srpt_opcode opcode;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+       enum srpt_command_state state;
 
-       index = idx_from_wr_id(wc->wr_id);
-       opcode = opcode_from_wr_id(wc->wr_id);
-       send_ioctx = ch->ioctx_ring[index];
-       if (wc->status == IB_WC_SUCCESS) {
-               if (opcode == SRPT_SEND)
-                       srpt_handle_send_comp(ch, send_ioctx);
-               else {
-                       WARN_ON(opcode != SRPT_RDMA_ABORT &&
-                               wc->opcode != IB_WC_RDMA_READ);
-                       srpt_handle_rdma_comp(ch, send_ioctx, opcode);
-               }
+       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+               state != SRPT_STATE_MGMT_RSP_SENT);
+
+       atomic_inc(&ch->sq_wr_avail);
+
+       if (wc->status != IB_WC_SUCCESS) {
+               pr_info("sending response for ioctx 0x%p failed"
+                       " with status %d\n", ioctx, wc->status);
+
+               atomic_dec(&ch->req_lim);
+               srpt_abort_cmd(ioctx);
+               goto out;
+       }
+
+       if (state != SRPT_STATE_DONE) {
+               srpt_unmap_sg_to_ib_sge(ch, ioctx);
+               transport_generic_free_cmd(&ioctx->cmd, 0);
        } else {
-               if (opcode == SRPT_SEND) {
-                       pr_info("sending response for idx %u failed"
-                               " with status %d\n", index, wc->status);
-                       srpt_handle_send_err_comp(ch, wc->wr_id);
-               } else if (opcode != SRPT_RDMA_MID) {
-                       pr_info("RDMA t %d for idx %u failed with"
-                               " status %d\n", opcode, index, wc->status);
-                       srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
-               }
+               pr_err("IB completion has been received too late for"
+                      " wr_id = %u.\n", ioctx->ioctx.index);
        }
 
-       while (unlikely(opcode == SRPT_SEND
-                       && !list_empty(&ch->cmd_wait_list)
-                       && srpt_get_ch_state(ch) == CH_LIVE
-                       && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+       while (!list_empty(&ch->cmd_wait_list) &&
+              srpt_get_ch_state(ch) == CH_LIVE &&
+              (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
                struct srpt_recv_ioctx *recv_ioctx;
 
                recv_ioctx = list_first_entry(&ch->cmd_wait_list,
                                              struct srpt_recv_ioctx,
                                              wait_list);
                list_del(&recv_ioctx->wait_list);
-               srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
-       }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
-       struct ib_wc *const wc = ch->wc;
-       int i, n;
-
-       WARN_ON(cq != ch->cq);
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
-               for (i = 0; i < n; i++) {
-                       if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
-                               srpt_process_rcv_completion(cq, ch, &wc[i]);
-                       else
-                               srpt_process_send_completion(cq, ch, &wc[i]);
-               }
+               srpt_handle_new_iu(ch, recv_ioctx, ioctx);
        }
 }
 
-/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- *   concurrently on two different CPUs for the same completion queue. See also
- *   Documentation/infiniband/core_locking.txt and the implementation of
- *   handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- *   context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
-       struct srpt_rdma_ch *ch = ctx;
-
-       wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
-       struct srpt_rdma_ch *ch;
-
-       /* Hibernation / freezing of the SRPT kernel thread is not supported. */
-       current->flags |= PF_NOFREEZE;
-
-       ch = arg;
-       BUG_ON(!ch);
-       pr_info("Session %s: kernel thread %s (PID %d) started\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       while (!kthread_should_stop()) {
-               wait_event_interruptible(ch->wait_queue,
-                       (srpt_process_completion(ch->cq, ch),
-                        kthread_should_stop()));
-       }
-       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       return 0;
-}
-
 /**
  * srpt_create_ch_ib() - Create receive and send completion queues.
  */
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
        struct srpt_port *sport = ch->sport;
        struct srpt_device *sdev = sport->sdev;
        u32 srp_sq_size = sport->port_attrib.srp_sq_size;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
                goto out;
 
 retry:
-       cq_attr.cqe = ch->rq_size + srp_sq_size;
-       ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
-                             &cq_attr);
+       ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+                       0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
                pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
        if (ret)
                goto err_destroy_qp;
 
-       init_waitqueue_head(&ch->wait_queue);
-
-       pr_debug("creating thread for session %s\n", ch->sess_name);
-
-       ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
-       if (IS_ERR(ch->thread)) {
-               pr_err("failed to create kernel thread %ld\n",
-                      PTR_ERR(ch->thread));
-               ch->thread = NULL;
-               goto err_destroy_qp;
-       }
-
 out:
        kfree(qp_init);
        return ret;
@@ -2150,17 +2006,14 @@ out:
 err_destroy_qp:
        ib_destroy_qp(ch->qp);
 err_destroy_cq:
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
        goto out;
 }
 
 static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
 {
-       if (ch->thread)
-               kthread_stop(ch->thread);
-
        ib_destroy_qp(ch->qp);
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
 }
 
 /**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                              struct srpt_send_ioctx *ioctx)
 {
-       struct ib_rdma_wr wr;
        struct ib_send_wr *bad_wr;
-       struct rdma_iu *riu;
-       int i;
-       int ret;
-       int sq_wr_avail;
+       int sq_wr_avail, ret, i;
        enum dma_data_direction dir;
        const int n_rdma = ioctx->n_rdma;
 
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                }
        }
 
-       ioctx->rdma_aborted = false;
-       ret = 0;
-       riu = ioctx->rdma_ius;
-       memset(&wr, 0, sizeof wr);
-
-       for (i = 0; i < n_rdma; ++i, ++riu) {
-               if (dir == DMA_FROM_DEVICE) {
-                       wr.wr.opcode = IB_WR_RDMA_WRITE;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_WRITE_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               } else {
-                       wr.wr.opcode = IB_WR_RDMA_READ;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_READ_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               }
-               wr.wr.next = NULL;
-               wr.remote_addr = riu->raddr;
-               wr.rkey = riu->rkey;
-               wr.wr.num_sge = riu->sge_cnt;
-               wr.wr.sg_list = riu->sge;
+       for (i = 0; i < n_rdma; i++) {
+               struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
 
-               /* only get completion event for the last rdma write */
-               if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
-                       wr.wr.send_flags = IB_SEND_SIGNALED;
+               wr->opcode = (dir == DMA_FROM_DEVICE) ?
+                               IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
 
-               ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
-               if (ret)
-                       break;
+               if (i == n_rdma - 1) {
+                       /* only get completion event for the last rdma read */
+                       if (dir == DMA_TO_DEVICE) {
+                               wr->send_flags = IB_SEND_SIGNALED;
+                               ioctx->rdma_cqe.done = srpt_rdma_read_done;
+                       } else {
+                               ioctx->rdma_cqe.done = srpt_rdma_write_done;
+                       }
+                       wr->wr_cqe = &ioctx->rdma_cqe;
+                       wr->next = NULL;
+               } else {
+                       wr->wr_cqe = NULL;
+                       wr->next = &ioctx->rdma_wrs[i + 1].wr;
+               }
        }
 
+       ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
        if (ret)
                pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
-       if (ret && i > 0) {
-               wr.wr.num_sge = 0;
-               wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
-               wr.wr.send_flags = IB_SEND_SIGNALED;
-               while (ch->state == CH_LIVE &&
-                       ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
-                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-               while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       pr_info("Waiting until RDMA abort finished [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-       }
 out:
        if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
                atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
        init_waitqueue_head(&sdev->ch_releaseQ);
        spin_lock_init(&sdev->spinlock);
 
-       if (ib_query_device(device, &sdev->dev_attr))
-               goto free_dev;
-
        sdev->pd = ib_alloc_pd(device);
        if (IS_ERR(sdev->pd))
                goto free_dev;
 
-       sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+       sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
 
        srq_attr.event_handler = srpt_srq_event;
        srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
                goto err_pd;
 
        pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
-                __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+                __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
                 device->name);
 
        if (!srpt_service_guid)
index 5366e0a..09037f2 100644 (file)
@@ -128,36 +128,6 @@ enum {
        DEFAULT_MAX_RDMA_SIZE = 65536,
 };
 
-enum srpt_opcode {
-       SRPT_RECV,
-       SRPT_SEND,
-       SRPT_RDMA_MID,
-       SRPT_RDMA_ABORT,
-       SRPT_RDMA_READ_LAST,
-       SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
-       return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
-       return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
-       return (u32)wr_id;
-}
-
-struct rdma_iu {
-       u64             raddr;
-       u32             rkey;
-       struct ib_sge   *sge;
-       u32             sge_cnt;
-       int             mem_id;
-};
-
 /**
  * enum srpt_command_state - SCSI command state managed by SRPT.
  * @SRPT_STATE_NEW:           New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
  * @index: Index of the I/O context in its ioctx_ring array.
  */
 struct srpt_ioctx {
+       struct ib_cqe           cqe;
        void                    *buf;
        dma_addr_t              dma;
        uint32_t                index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
  * @sg:          Pointer to sg-list associated with this I/O context.
  * @sg_cnt:      SG-list size.
  * @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius:  Number of elements in the rdma_ius array.
- * @rdma_ius:    Array with information about the RDMA mapping.
+ * @n_rdma_wrs:  Number of elements in the rdma_wrs array.
+ * @rdma_wrs:    Array with information about the RDMA mapping.
  * @tag:         Tag of the received SRP information unit.
  * @spinlock:    Protects 'state'.
  * @state:       I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- *              the already initiated transfers have finished.
  * @cmd:         Target core command data structure.
  * @sense_data:  SCSI sense data.
  */
 struct srpt_send_ioctx {
        struct srpt_ioctx       ioctx;
        struct srpt_rdma_ch     *ch;
-       struct rdma_iu          *rdma_ius;
+       struct ib_rdma_wr       *rdma_wrs;
+       struct ib_cqe           rdma_cqe;
        struct srp_direct_buf   *rbufs;
        struct srp_direct_buf   single_rbuf;
        struct scatterlist      *sg;
        struct list_head        free_list;
        spinlock_t              spinlock;
        enum srpt_command_state state;
-       bool                    rdma_aborted;
        struct se_cmd           cmd;
        struct completion       tx_done;
        int                     sg_cnt;
        int                     mapped_sg_count;
-       u16                     n_rdma_ius;
+       u16                     n_rdma_wrs;
        u8                      n_rdma;
        u8                      n_rbuf;
        bool                    queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
 
 /**
  * struct srpt_rdma_ch - RDMA channel.
- * @wait_queue:    Allows the kernel thread to wait for more work.
- * @thread:        Kernel thread that processes the IB queues associated with
- *                 the channel.
  * @cm_id:         IB CM ID associated with the channel.
  * @qp:            IB queue pair used for communicating over this channel.
  * @cq:            IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
  * @free_list:     Head of list with free send I/O contexts.
  * @state:         channel state. See also enum rdma_ch_state.
  * @ioctx_ring:    Send ring.
- * @wc:            IB work completion array for srpt_process_completion().
  * @list:          Node for insertion in the srpt_device.rch_list list.
  * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
  *                 list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
  * @release_done:  Enables waiting for srpt_release_channel() completion.
  */
 struct srpt_rdma_ch {
-       wait_queue_head_t       wait_queue;
-       struct task_struct      *thread;
        struct ib_cm_id         *cm_id;
        struct ib_qp            *qp;
        struct ib_cq            *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
        struct list_head        free_list;
        enum rdma_ch_state      state;
        struct srpt_send_ioctx  **ioctx_ring;
-       struct ib_wc            wc[16];
        struct list_head        list;
        struct list_head        cmd_wait_list;
        struct se_session       *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
  * @mr:            L_Key (local key) with write access to all local memory.
  * @srq:           Per-HCA SRQ (shared receive queue).
  * @cm_id:         Connection identifier.
- * @dev_attr:      Attributes of the InfiniBand device as obtained during the
- *                 ib_client.add() callback.
  * @srq_size:      SRQ size.
  * @ioctx_ring:    Per-HCA SRQ.
  * @rch_list:      Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
        struct ib_pd            *pd;
        struct ib_srq           *srq;
        struct ib_cm_id         *cm_id;
-       struct ib_device_attr   dev_attr;
        int                     srq_size;
        struct srpt_recv_ioctx  **ioctx_ring;
        struct list_head        rch_list;
index fd4100d..6727954 100644 (file)
  */
 
 #include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/usb/input.h>
+#include <linux/usb/quirks.h>
 
 #define DRIVER_AUTHOR "Marko Friedemann <mfr@bmx-chemnitz.de>"
 #define DRIVER_DESC "X-Box pad driver"
@@ -125,7 +128,7 @@ static const struct xpad_device {
        { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
        { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
        { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
-       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE },
+       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
        { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -317,21 +320,42 @@ static struct usb_device_id xpad_table[] = {
 
 MODULE_DEVICE_TABLE(usb, xpad_table);
 
+struct xpad_output_packet {
+       u8 data[XPAD_PKT_LEN];
+       u8 len;
+       bool pending;
+};
+
+#define XPAD_OUT_CMD_IDX       0
+#define XPAD_OUT_FF_IDX                1
+#define XPAD_OUT_LED_IDX       (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
+#define XPAD_NUM_OUT_PACKETS   (1 + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))
+
 struct usb_xpad {
        struct input_dev *dev;          /* input device interface */
+       struct input_dev __rcu *x360w_dev;
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;     /* usb interface */
 
-       int pad_present;
+       bool pad_present;
+       bool input_created;
 
        struct urb *irq_in;             /* urb for interrupt in report */
        unsigned char *idata;           /* input data */
        dma_addr_t idata_dma;
 
        struct urb *irq_out;            /* urb for interrupt out report */
+       struct usb_anchor irq_out_anchor;
+       bool irq_out_active;            /* we must not use an active URB */
+       u8 odata_serial;                /* serial number for xbox one protocol */
        unsigned char *odata;           /* output data */
        dma_addr_t odata_dma;
-       struct mutex odata_mutex;
+       spinlock_t odata_lock;
+
+       struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
+       int last_out_packet;
 
 #if defined(CONFIG_JOYSTICK_XPAD_LEDS)
        struct xpad_led *led;
@@ -343,8 +367,12 @@ struct usb_xpad {
        int xtype;                      /* type of xbox device */
        int pad_nr;                     /* the order x360 pads were attached */
        const char *name;               /* name of the device */
+       struct work_struct work;        /* init/remove device from callback */
 };
 
+static int xpad_init_input(struct usb_xpad *xpad);
+static void xpad_deinit_input(struct usb_xpad *xpad);
+
 /*
  *     xpad_process_packet
  *
@@ -424,11 +452,9 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
  *             http://www.free60.org/wiki/Gamepad
  */
 
-static void xpad360_process_packet(struct usb_xpad *xpad,
+static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
                                   u16 cmd, unsigned char *data)
 {
-       struct input_dev *dev = xpad->dev;
-
        /* digital pad */
        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                /* dpad as buttons (left, right, up, down) */
@@ -495,7 +521,30 @@ static void xpad360_process_packet(struct usb_xpad *xpad,
        input_sync(dev);
 }
 
-static void xpad_identify_controller(struct usb_xpad *xpad);
+static void xpad_presence_work(struct work_struct *work)
+{
+       struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
+       int error;
+
+       if (xpad->pad_present) {
+               error = xpad_init_input(xpad);
+               if (error) {
+                       /* complain only, not much else we can do here */
+                       dev_err(&xpad->dev->dev,
+                               "unable to init device: %d\n", error);
+               } else {
+                       rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
+               }
+       } else {
+               RCU_INIT_POINTER(xpad->x360w_dev, NULL);
+               synchronize_rcu();
+               /*
+                * Now that we are sure xpad360w_process_packet is not
+                * using input device we can get rid of it.
+                */
+               xpad_deinit_input(xpad);
+       }
+}
 
 /*
  * xpad360w_process_packet
@@ -513,24 +562,28 @@ static void xpad_identify_controller(struct usb_xpad *xpad);
  */
 static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
 {
+       struct input_dev *dev;
+       bool present;
+
        /* Presence change */
        if (data[0] & 0x08) {
-               if (data[1] & 0x80) {
-                       xpad->pad_present = 1;
-                       /*
-                        * Light up the segment corresponding to
-                        * controller number.
-                        */
-                       xpad_identify_controller(xpad);
-               } else
-                       xpad->pad_present = 0;
+               present = (data[1] & 0x80) != 0;
+
+               if (xpad->pad_present != present) {
+                       xpad->pad_present = present;
+                       schedule_work(&xpad->work);
+               }
        }
 
        /* Valid pad data */
-       if (!(data[1] & 0x1))
+       if (data[1] != 0x1)
                return;
 
-       xpad360_process_packet(xpad, cmd, &data[4]);
+       rcu_read_lock();
+       dev = rcu_dereference(xpad->x360w_dev);
+       if (dev)
+               xpad360_process_packet(xpad, dev, cmd, &data[4]);
+       rcu_read_unlock();
 }
 
 /*
@@ -659,7 +712,7 @@ static void xpad_irq_in(struct urb *urb)
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad360_process_packet(xpad, 0, xpad->idata);
+               xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
                break;
        case XTYPE_XBOX360W:
                xpad360w_process_packet(xpad, 0, xpad->idata);
@@ -678,18 +731,73 @@ exit:
                        __func__, retval);
 }
 
+/* Callers must hold xpad->odata_lock spinlock */
+static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *pkt, *packet = NULL;
+       int i;
+
+       for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
+               if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
+                       xpad->last_out_packet = 0;
+
+               pkt = &xpad->out_packets[xpad->last_out_packet];
+               if (pkt->pending) {
+                       dev_dbg(&xpad->intf->dev,
+                               "%s - found pending output packet %d\n",
+                               __func__, xpad->last_out_packet);
+                       packet = pkt;
+                       break;
+               }
+       }
+
+       if (packet) {
+               memcpy(xpad->odata, packet->data, packet->len);
+               xpad->irq_out->transfer_buffer_length = packet->len;
+               return true;
+       }
+
+       return false;
+}
+
+/* Callers must hold xpad->odata_lock spinlock */
+static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
+{
+       int error;
+
+       if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
+               usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
+               error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+               if (error) {
+                       dev_err(&xpad->intf->dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(xpad->irq_out);
+                       return -EIO;
+               }
+
+               xpad->irq_out_active = true;
+       }
+
+       return 0;
+}
+
 static void xpad_irq_out(struct urb *urb)
 {
        struct usb_xpad *xpad = urb->context;
        struct device *dev = &xpad->intf->dev;
-       int retval, status;
+       int status = urb->status;
+       int error;
+       unsigned long flags;
 
-       status = urb->status;
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (status) {
        case 0:
                /* success */
-               return;
+               xpad->out_packets[xpad->last_out_packet].pending = false;
+               xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+               break;
 
        case -ECONNRESET:
        case -ENOENT:
@@ -697,19 +805,28 @@ static void xpad_irq_out(struct urb *urb)
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
-               return;
+               xpad->irq_out_active = false;
+               break;
 
        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
-               goto exit;
+               break;
        }
 
-exit:
-       retval = usb_submit_urb(urb, GFP_ATOMIC);
-       if (retval)
-               dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
-                       __func__, retval);
+       if (xpad->irq_out_active) {
+               usb_anchor_urb(urb, &xpad->irq_out_anchor);
+               error = usb_submit_urb(urb, GFP_ATOMIC);
+               if (error) {
+                       dev_err(dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(urb);
+                       xpad->irq_out_active = false;
+               }
+       }
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -721,6 +838,8 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
        if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;
 
+       init_usb_anchor(&xpad->irq_out_anchor);
+
        xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
                                         GFP_KERNEL, &xpad->odata_dma);
        if (!xpad->odata) {
@@ -728,7 +847,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
                goto fail1;
        }
 
-       mutex_init(&xpad->odata_mutex);
+       spin_lock_init(&xpad->odata_lock);
 
        xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
        if (!xpad->irq_out) {
@@ -755,8 +874,14 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
 
 static void xpad_stop_output(struct usb_xpad *xpad)
 {
-       if (xpad->xtype != XTYPE_UNKNOWN)
-               usb_kill_urb(xpad->irq_out);
+       if (xpad->xtype != XTYPE_UNKNOWN) {
+               if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
+                                                  5000)) {
+                       dev_warn(&xpad->intf->dev,
+                                "timed out waiting for output URB to complete, killing\n");
+                       usb_kill_anchored_urbs(&xpad->irq_out_anchor);
+               }
+       }
 }
 
 static void xpad_deinit_output(struct usb_xpad *xpad)
@@ -770,27 +895,60 @@ static void xpad_deinit_output(struct usb_xpad *xpad)
 
 static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
        int retval;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
+       packet->data[0] = 0x08;
+       packet->data[1] = 0x00;
+       packet->data[2] = 0x0F;
+       packet->data[3] = 0xC0;
+       packet->data[4] = 0x00;
+       packet->data[5] = 0x00;
+       packet->data[6] = 0x00;
+       packet->data[7] = 0x00;
+       packet->data[8] = 0x00;
+       packet->data[9] = 0x00;
+       packet->data[10] = 0x00;
+       packet->data[11] = 0x00;
+       packet->len = 12;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out presence first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
-       xpad->odata[0] = 0x08;
-       xpad->odata[1] = 0x00;
-       xpad->odata[2] = 0x0F;
-       xpad->odata[3] = 0xC0;
-       xpad->odata[4] = 0x00;
-       xpad->odata[5] = 0x00;
-       xpad->odata[6] = 0x00;
-       xpad->odata[7] = 0x00;
-       xpad->odata[8] = 0x00;
-       xpad->odata[9] = 0x00;
-       xpad->odata[10] = 0x00;
-       xpad->odata[11] = 0x00;
-       xpad->irq_out->transfer_buffer_length = 12;
+       return retval;
+}
+
+static int xpad_start_xbox_one(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
+       int retval;
 
-       retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
-       mutex_unlock(&xpad->odata_mutex);
+       /* Xbox one controller needs to be initialized. */
+       packet->data[0] = 0x05;
+       packet->data[1] = 0x20;
+       packet->data[2] = xpad->odata_serial++; /* packet serial */
+       packet->data[3] = 0x01; /* rumble bit enable?  */
+       packet->data[4] = 0x00;
+       packet->len = 5;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out start packet first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
        return retval;
 }
@@ -799,8 +957,11 @@ static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
 {
        struct usb_xpad *xpad = input_get_drvdata(dev);
+       struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
        __u16 strong;
        __u16 weak;
+       int retval;
+       unsigned long flags;
 
        if (effect->type != FF_RUMBLE)
                return 0;
@@ -808,69 +969,81 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
        strong = effect->u.rumble.strong_magnitude;
        weak = effect->u.rumble.weak_magnitude;
 
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
        switch (xpad->xtype) {
        case XTYPE_XBOX:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x06;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator */
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = weak / 256;    /* right actuator */
-               xpad->irq_out->transfer_buffer_length = 6;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x06;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256; /* left actuator */
+               packet->data[4] = 0x00;
+               packet->data[5] = weak / 256;   /* right actuator */
+               packet->len = 6;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator? */
-               xpad->odata[4] = weak / 256;    /* right actuator? */
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 8;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x08;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256;  /* left actuator? */
+               packet->data[4] = weak / 256;   /* right actuator? */
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->len = 8;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x01;
-               xpad->odata[2] = 0x0F;
-               xpad->odata[3] = 0xC0;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = strong / 256;
-               xpad->odata[6] = weak / 256;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x01;
+               packet->data[2] = 0x0F;
+               packet->data[3] = 0xC0;
+               packet->data[4] = 0x00;
+               packet->data[5] = strong / 256;
+               packet->data[6] = weak / 256;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOXONE:
-               xpad->odata[0] = 0x09; /* activate rumble */
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = 0x08; /* continuous effect */
-               xpad->odata[4] = 0x00; /* simple rumble mode */
-               xpad->odata[5] = 0x03; /* L and R actuator only */
-               xpad->odata[6] = 0x00; /* TODO: LT actuator */
-               xpad->odata[7] = 0x00; /* TODO: RT actuator */
-               xpad->odata[8] = strong / 256;  /* left actuator */
-               xpad->odata[9] = weak / 256;    /* right actuator */
-               xpad->odata[10] = 0x80; /* length of pulse */
-               xpad->odata[11] = 0x00; /* stop period of pulse */
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x09; /* activate rumble */
+               packet->data[1] = 0x08;
+               packet->data[2] = xpad->odata_serial++;
+               packet->data[3] = 0x08; /* continuous effect */
+               packet->data[4] = 0x00; /* simple rumble mode */
+               packet->data[5] = 0x03; /* L and R actuator only */
+               packet->data[6] = 0x00; /* TODO: LT actuator */
+               packet->data[7] = 0x00; /* TODO: RT actuator */
+               packet->data[8] = strong / 512; /* left actuator */
+               packet->data[9] = weak / 512;   /* right actuator */
+               packet->data[10] = 0x80;        /* length of pulse */
+               packet->data[11] = 0x00;        /* stop period of pulse */
+               packet->data[12] = 0x00;
+               packet->len = 13;
+               packet->pending = true;
                break;
 
        default:
                dev_dbg(&xpad->dev->dev,
                        "%s - rumble command sent to unsupported xpad type: %d\n",
                        __func__, xpad->xtype);
-               return -EINVAL;
+               retval = -EINVAL;
+               goto out;
        }
 
-       return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+out:
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
+       return retval;
 }
 
 static int xpad_init_ff(struct usb_xpad *xpad)
@@ -921,36 +1094,44 @@ struct xpad_led {
  */
 static void xpad_send_led_command(struct usb_xpad *xpad, int command)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_LED_IDX];
+       unsigned long flags;
+
        command %= 16;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x01;
-               xpad->odata[1] = 0x03;
-               xpad->odata[2] = command;
-               xpad->irq_out->transfer_buffer_length = 3;
+               packet->data[0] = 0x01;
+               packet->data[1] = 0x03;
+               packet->data[2] = command;
+               packet->len = 3;
+               packet->pending = true;
                break;
+
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x00;
-               xpad->odata[2] = 0x08;
-               xpad->odata[3] = 0x40 + command;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x00;
+               packet->data[2] = 0x08;
+               packet->data[3] = 0x40 + command;
+               packet->data[4] = 0x00;
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
        }
 
-       usb_submit_urb(xpad->irq_out, GFP_KERNEL);
-       mutex_unlock(&xpad->odata_mutex);
+       xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 /*
@@ -959,7 +1140,7 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command)
  */
 static void xpad_identify_controller(struct usb_xpad *xpad)
 {
-       xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2);
+       led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
 }
 
 static void xpad_led_set(struct led_classdev *led_cdev,
@@ -1001,14 +1182,7 @@ static int xpad_led_probe(struct usb_xpad *xpad)
        if (error)
                goto err_free_id;
 
-       if (xpad->xtype == XTYPE_XBOX360) {
-               /*
-                * Light up the segment corresponding to controller
-                * number on wired devices. On wireless we'll do that
-                * when they respond to "presence" packet.
-                */
-               xpad_identify_controller(xpad);
-       }
+       xpad_identify_controller(xpad);
 
        return 0;
 
@@ -1036,37 +1210,73 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) { }
 static void xpad_identify_controller(struct usb_xpad *xpad) { }
 #endif
 
-static int xpad_open(struct input_dev *dev)
+static int xpad_start_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
-
-       /* URB was submitted in probe */
-       if (xpad->xtype == XTYPE_XBOX360W)
-               return 0;
+       int error;
 
-       xpad->irq_in->dev = xpad->udev;
        if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
                return -EIO;
 
        if (xpad->xtype == XTYPE_XBOXONE) {
-               /* Xbox one controller needs to be initialized. */
-               xpad->odata[0] = 0x05;
-               xpad->odata[1] = 0x20;
-               xpad->irq_out->transfer_buffer_length = 2;
-               return usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+               error = xpad_start_xbox_one(xpad);
+               if (error) {
+                       usb_kill_urb(xpad->irq_in);
+                       return error;
+               }
        }
 
        return 0;
 }
 
-static void xpad_close(struct input_dev *dev)
+static void xpad_stop_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
+       usb_kill_urb(xpad->irq_in);
+}
+
+static int xpad360w_start_input(struct usb_xpad *xpad)
+{
+       int error;
 
-       if (xpad->xtype != XTYPE_XBOX360W)
+       error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+       if (error)
+               return -EIO;
+
+       /*
+        * Send presence packet.
+        * This will force the controller to resend connection packets.
+        * This is useful in the case we activate the module after the
+        * adapter has been plugged in, as it won't automatically
+        * send us info about the controllers.
+        */
+       error = xpad_inquiry_pad_presence(xpad);
+       if (error) {
                usb_kill_urb(xpad->irq_in);
+               return error;
+       }
 
-       xpad_stop_output(xpad);
+       return 0;
+}
+
+static void xpad360w_stop_input(struct usb_xpad *xpad)
+{
+       usb_kill_urb(xpad->irq_in);
+
+       /* Make sure we are done with presence work if it was scheduled */
+       flush_work(&xpad->work);
+}
+
+static int xpad_open(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       return xpad_start_input(xpad);
+}
+
+static void xpad_close(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       xpad_stop_input(xpad);
 }
 
 static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
@@ -1097,8 +1307,11 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
 
 static void xpad_deinit_input(struct usb_xpad *xpad)
 {
-       xpad_led_disconnect(xpad);
-       input_unregister_device(xpad->dev);
+       if (xpad->input_created) {
+               xpad->input_created = false;
+               xpad_led_disconnect(xpad);
+               input_unregister_device(xpad->dev);
+       }
 }
 
 static int xpad_init_input(struct usb_xpad *xpad)
@@ -1118,8 +1331,10 @@ static int xpad_init_input(struct usb_xpad *xpad)
 
        input_set_drvdata(input_dev, xpad);
 
-       input_dev->open = xpad_open;
-       input_dev->close = xpad_close;
+       if (xpad->xtype != XTYPE_XBOX360W) {
+               input_dev->open = xpad_open;
+               input_dev->close = xpad_close;
+       }
 
        __set_bit(EV_KEY, input_dev->evbit);
 
@@ -1181,6 +1396,7 @@ static int xpad_init_input(struct usb_xpad *xpad)
        if (error)
                goto err_disconnect_led;
 
+       xpad->input_created = true;
        return 0;
 
 err_disconnect_led:
@@ -1241,6 +1457,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
        xpad->mapping = xpad_device[i].mapping;
        xpad->xtype = xpad_device[i].xtype;
        xpad->name = xpad_device[i].name;
+       INIT_WORK(&xpad->work, xpad_presence_work);
 
        if (xpad->xtype == XTYPE_UNKNOWN) {
                if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
@@ -1277,10 +1494,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 
        usb_set_intfdata(intf, xpad);
 
-       error = xpad_init_input(xpad);
-       if (error)
-               goto err_deinit_output;
-
        if (xpad->xtype == XTYPE_XBOX360W) {
                /*
                 * Submit the int URB immediately rather than waiting for open
@@ -1289,28 +1502,24 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
                 * exactly the message that a controller has arrived that
                 * we're waiting for.
                 */
-               xpad->irq_in->dev = xpad->udev;
-               error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+               error = xpad360w_start_input(xpad);
                if (error)
-                       goto err_deinit_input;
-
+                       goto err_deinit_output;
                /*
-                * Send presence packet.
-                * This will force the controller to resend connection packets.
-                * This is useful in the case we activate the module after the
-                * adapter has been plugged in, as it won't automatically
-                * send us info about the controllers.
+                * Wireless controllers require RESET_RESUME to work properly
+                * after suspend. Ideally this quirk should be in usb core
+                * quirk list, but we have too many vendors producing these
+                * controllers and we'd need to maintain 2 identical lists
+                * here in this driver and in usb core.
                 */
-               error = xpad_inquiry_pad_presence(xpad);
+               udev->quirks |= USB_QUIRK_RESET_RESUME;
+       } else {
+               error = xpad_init_input(xpad);
                if (error)
-                       goto err_kill_in_urb;
+                       goto err_deinit_output;
        }
        return 0;
 
-err_kill_in_urb:
-       usb_kill_urb(xpad->irq_in);
-err_deinit_input:
-       xpad_deinit_input(xpad);
 err_deinit_output:
        xpad_deinit_output(xpad);
 err_free_in_urb:
@@ -1320,19 +1529,24 @@ err_free_idata:
 err_free_mem:
        kfree(xpad);
        return error;
-
 }
 
 static void xpad_disconnect(struct usb_interface *intf)
 {
-       struct usb_xpad *xpad = usb_get_intfdata (intf);
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+
+       if (xpad->xtype == XTYPE_XBOX360W)
+               xpad360w_stop_input(xpad);
 
        xpad_deinit_input(xpad);
-       xpad_deinit_output(xpad);
 
-       if (xpad->xtype == XTYPE_XBOX360W) {
-               usb_kill_urb(xpad->irq_in);
-       }
+       /*
+        * Now that both input device and LED device are gone we can
+        * stop output URB.
+        */
+       xpad_stop_output(xpad);
+
+       xpad_deinit_output(xpad);
 
        usb_free_urb(xpad->irq_in);
        usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -1343,10 +1557,55 @@ static void xpad_disconnect(struct usb_interface *intf)
        usb_set_intfdata(intf, NULL);
 }
 
+static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               /*
+                * Wireless controllers always listen to input so
+                * they are notified when controller shows up
+                * or goes away.
+                */
+               xpad360w_stop_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       xpad_stop_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       xpad_stop_output(xpad);
+
+       return 0;
+}
+
+static int xpad_resume(struct usb_interface *intf)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+       int retval = 0;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               retval = xpad360w_start_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       retval = xpad_start_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       return retval;
+}
+
 static struct usb_driver xpad_driver = {
        .name           = "xpad",
        .probe          = xpad_probe,
        .disconnect     = xpad_disconnect,
+       .suspend        = xpad_suspend,
+       .resume         = xpad_resume,
+       .reset_resume   = xpad_resume,
        .id_table       = xpad_table,
 };
 
index b9f01bd..2909365 100644 (file)
@@ -630,7 +630,7 @@ gpio_keys_get_devtree_pdata(struct device *dev)
        if (!node)
                return ERR_PTR(-ENODEV);
 
-       nbuttons = of_get_child_count(node);
+       nbuttons = of_get_available_child_count(node);
        if (nbuttons == 0)
                return ERR_PTR(-ENODEV);
 
@@ -645,8 +645,10 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
        pdata->rep = !!of_get_property(node, "autorepeat", NULL);
 
+       of_property_read_string(node, "label", &pdata->name);
+
        i = 0;
-       for_each_child_of_node(node, pp) {
+       for_each_available_child_of_node(node, pp) {
                enum of_gpio_flags flags;
 
                button = &pdata->buttons[i++];
index 2d5794e..2160512 100644 (file)
@@ -113,8 +113,8 @@ struct t7_config {
 #define MXT_T9_DETECT          (1 << 7)
 
 struct t9_range {
-       u16 x;
-       u16 y;
+       __le16 x;
+       __le16 y;
 } __packed;
 
 /* MXT_TOUCH_MULTI_T9 orient */
@@ -216,6 +216,7 @@ struct mxt_data {
        unsigned int irq;
        unsigned int max_x;
        unsigned int max_y;
+       bool xy_switch;
        bool in_bootloader;
        u16 mem_size;
        u8 t100_aux_ampl;
@@ -1665,8 +1666,8 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range.x);
-       le16_to_cpus(&range.y);
+       data->max_x = get_unaligned_le16(&range.x);
+       data->max_y = get_unaligned_le16(&range.y);
 
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T9_ORIENT,
@@ -1674,23 +1675,7 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range.x == 0)
-               range.x = 1023;
-
-       if (range.y == 0)
-               range.y = 1023;
-
-       if (orient & MXT_T9_ORIENT_SWITCH) {
-               data->max_x = range.y;
-               data->max_y = range.x;
-       } else {
-               data->max_x = range.x;
-               data->max_y = range.y;
-       }
-
-       dev_dbg(&client->dev,
-               "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+       data->xy_switch = orient & MXT_T9_ORIENT_SWITCH;
 
        return 0;
 }
@@ -1708,13 +1693,14 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (!object)
                return -EINVAL;
 
+       /* read touchscreen dimensions */
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_XRANGE,
                               sizeof(range_x), &range_x);
        if (error)
                return error;
 
-       le16_to_cpus(&range_x);
+       data->max_x = get_unaligned_le16(&range_x);
 
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_YRANGE,
@@ -1722,36 +1708,24 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range_y);
+       data->max_y = get_unaligned_le16(&range_y);
 
+       /* read orientation config */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_CFG1,
                                1, &cfg);
        if (error)
                return error;
 
+       data->xy_switch = cfg & MXT_T100_CFG_SWITCHXY;
+
+       /* allocate aux bytes */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_TCHAUX,
                                1, &tchaux);
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range_x == 0)
-               range_x = 1023;
-
-       if (range_y == 0)
-               range_y = 1023;
-
-       if (cfg & MXT_T100_CFG_SWITCHXY) {
-               data->max_x = range_y;
-               data->max_y = range_x;
-       } else {
-               data->max_x = range_x;
-               data->max_y = range_y;
-       }
-
-       /* allocate aux bytes */
        aux = 6;
 
        if (tchaux & MXT_T100_TCHAUX_VECT)
@@ -1767,9 +1741,6 @@ static int mxt_read_t100_config(struct mxt_data *data)
                "T100 aux mappings vect:%u ampl:%u area:%u\n",
                data->t100_aux_vect, data->t100_aux_ampl, data->t100_aux_area);
 
-       dev_info(&client->dev,
-                "T100 Touchscreen size X%uY%u\n", data->max_x, data->max_y);
-
        return 0;
 }
 
@@ -1828,6 +1799,19 @@ static int mxt_initialize_input_device(struct mxt_data *data)
                return -EINVAL;
        }
 
+       /* Handle default values and orientation switch */
+       if (data->max_x == 0)
+               data->max_x = 1023;
+
+       if (data->max_y == 0)
+               data->max_y = 1023;
+
+       if (data->xy_switch)
+               swap(data->max_x, data->max_y);
+
+       dev_info(dev, "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+       /* Register input device */
        input_dev = input_allocate_device();
        if (!input_dev) {
                dev_err(dev, "Failed to allocate memory\n");
index 539b0de..e5e2239 100644 (file)
@@ -2049,7 +2049,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
        /* Update device table */
        set_dte_entry(dev_data->devid, domain, ats);
        if (alias != dev_data->devid)
-               set_dte_entry(dev_data->devid, domain, ats);
+               set_dte_entry(alias, domain, ats);
 
        device_flush_dte(dev_data);
 }
index ac73876..986a53e 100644 (file)
@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
        struct pci_dev *pdev;
 
-       if (dev_is_pci(info->dev))
+       if (!dev_is_pci(info->dev))
                return;
 
        pdev = to_pci_dev(info->dev);
index 8bbcbfe..381ca5a 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
index 11fc2a2..fb50911 100644 (file)
@@ -130,6 +130,11 @@ config ORION_IRQCHIP
        select IRQ_DOMAIN
        select MULTI_IRQ_HANDLER
 
+config PIC32_EVIC
+       bool
+       select GENERIC_IRQ_CHIP
+       select IRQ_DOMAIN
+
 config RENESAS_INTC_IRQPIN
        bool
        select IRQ_DOMAIN
@@ -154,6 +159,7 @@ config TB10X_IRQC
 config TS4800_IRQ
        tristate "TS-4800 IRQ controller"
        select IRQ_DOMAIN
+       depends on HAS_IOMEM
        help
          Support for the TS-4800 FPGA IRQ controller
 
index d4c2e4e..18caacb 100644 (file)
@@ -58,3 +58,4 @@ obj-$(CONFIG_RENESAS_H8S_INTC)                += irq-renesas-h8s.o
 obj-$(CONFIG_ARCH_SA1100)              += irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)              += irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)                        += irq-imx-gpcv2.o
+obj-$(CONFIG_PIC32_EVIC)               += irq-pic32-evic.o
index b12a5d5..37199b9 100644 (file)
@@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val)
            priority > AT91_AIC_IRQ_MAX_PRIORITY)
                return -EINVAL;
 
-       *val &= AT91_AIC_PRIOR;
+       *val &= ~AT91_AIC_PRIOR;
        *val |= priority;
 
        return 0;
index e23d1d1..3447549 100644 (file)
@@ -875,6 +875,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                }
 
                alloc_size = (1 << order) * PAGE_SIZE;
+retry_alloc_baser:
                alloc_pages = (alloc_size / psz);
                if (alloc_pages > GITS_BASER_PAGES_MAX) {
                        alloc_pages = GITS_BASER_PAGES_MAX;
@@ -938,13 +939,16 @@ retry_baser:
                         * size and retry. If we reach 4K, then
                         * something is horribly wrong...
                         */
+                       free_pages((unsigned long)base, order);
+                       its->tables[i] = NULL;
+
                        switch (psz) {
                        case SZ_16K:
                                psz = SZ_4K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        case SZ_64K:
                                psz = SZ_16K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        }
                }
 
index c22e2d4..efe5084 100644 (file)
@@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np,
                writel(0, icoll_priv.intr + i);
 
        icoll_add_domain(np, ASM9260_NUM_IRQS);
+       set_handle_irq(icoll_handle_irq);
 
        return 0;
 }
diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c
new file mode 100644 (file)
index 0000000..e7155db
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ * Cristian Birsan <cristian.birsan@microchip.com>
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2016 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irq.h>
+
+#include <asm/irq.h>
+#include <asm/traps.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define REG_INTCON     0x0000
+#define REG_INTSTAT    0x0020
+#define REG_IFS_OFFSET 0x0040
+#define REG_IEC_OFFSET 0x00C0
+#define REG_IPC_OFFSET 0x0140
+#define REG_OFF_OFFSET 0x0540
+
+#define MAJPRI_MASK    0x07
+#define SUBPRI_MASK    0x03
+#define PRIORITY_MASK  0x1F
+
+#define PIC32_INT_PRI(pri, subpri)                             \
+       ((((pri) & MAJPRI_MASK) << 2) | ((subpri) & SUBPRI_MASK))
+
+struct evic_chip_data {
+       u32 irq_types[NR_IRQS];
+       u32 ext_irqs[8];
+};
+
+static struct irq_domain *evic_irq_domain;
+static void __iomem *evic_base;
+
+asmlinkage void __weak plat_irq_dispatch(void)
+{
+       unsigned int irq, hwirq;
+
+       hwirq = readl(evic_base + REG_INTSTAT) & 0xFF;
+       irq = irq_linear_revmap(evic_irq_domain, hwirq);
+       do_IRQ(irq);
+}
+
+static struct evic_chip_data *irqd_to_priv(struct irq_data *data)
+{
+       return (struct evic_chip_data *)data->domain->host_data;
+}
+
+static int pic32_set_ext_polarity(int bit, u32 type)
+{
+       /*
+        * External interrupts can be either edge rising or edge falling,
+        * but not both.
+        */
+       switch (type) {
+       case IRQ_TYPE_EDGE_RISING:
+               writel(BIT(bit), evic_base + PIC32_SET(REG_INTCON));
+               break;
+       case IRQ_TYPE_EDGE_FALLING:
+               writel(BIT(bit), evic_base + PIC32_CLR(REG_INTCON));
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int pic32_set_type_edge(struct irq_data *data,
+                              unsigned int flow_type)
+{
+       struct evic_chip_data *priv = irqd_to_priv(data);
+       int ret;
+       int i;
+
+       if (!(flow_type & IRQ_TYPE_EDGE_BOTH))
+               return -EBADR;
+
+       /* set polarity for external interrupts only */
+       for (i = 0; i < ARRAY_SIZE(priv->ext_irqs); i++) {
+               if (priv->ext_irqs[i] == data->hwirq) {
+                       ret = pic32_set_ext_polarity(i + 1, flow_type);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       irqd_set_trigger_type(data, flow_type);
+
+       return IRQ_SET_MASK_OK;
+}
+
+static void pic32_bind_evic_interrupt(int irq, int set)
+{
+       writel(set, evic_base + REG_OFF_OFFSET + irq * 4);
+}
+
+static void pic32_set_irq_priority(int irq, int priority)
+{
+       u32 reg, shift;
+
+       reg = irq / 4;
+       shift = (irq % 4) * 8;
+
+       writel(PRIORITY_MASK << shift,
+               evic_base + PIC32_CLR(REG_IPC_OFFSET + reg * 0x10));
+       writel(priority << shift,
+               evic_base + PIC32_SET(REG_IPC_OFFSET + reg * 0x10));
+}
+
+#define IRQ_REG_MASK(_hwirq, _reg, _mask)                     \
+       do {                                                   \
+               _reg = _hwirq / 32;                            \
+               _mask = 1 << (_hwirq % 32);                    \
+       } while (0)
+
+static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq,
+                               irq_hw_number_t hw)
+{
+       struct evic_chip_data *priv = d->host_data;
+       struct irq_data *data;
+       int ret;
+       u32 iecclr, ifsclr;
+       u32 reg, mask;
+
+       ret = irq_map_generic_chip(d, virq, hw);
+       if (ret)
+               return ret;
+
+       /*
+        * Piggyback on xlate function to move to an alternate chip as necessary
+        * at time of mapping instead of allowing the flow handler/chip to be
+        * changed later. This requires all interrupts to be configured through
+        * DT.
+        */
+       if (priv->irq_types[hw] & IRQ_TYPE_SENSE_MASK) {
+               data = irq_domain_get_irq_data(d, virq);
+               irqd_set_trigger_type(data, priv->irq_types[hw]);
+               irq_setup_alt_chip(data, priv->irq_types[hw]);
+       }
+
+       IRQ_REG_MASK(hw, reg, mask);
+
+       iecclr = PIC32_CLR(REG_IEC_OFFSET + reg * 0x10);
+       ifsclr = PIC32_CLR(REG_IFS_OFFSET + reg * 0x10);
+
+       /* mask and clear flag */
+       writel(mask, evic_base + iecclr);
+       writel(mask, evic_base + ifsclr);
+
+       /* default priority is required */
+       pic32_set_irq_priority(hw, PIC32_INT_PRI(2, 0));
+
+       return ret;
+}
+
+int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+                          const u32 *intspec, unsigned int intsize,
+                          irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+       struct evic_chip_data *priv = d->host_data;
+
+       if (WARN_ON(intsize < 2))
+               return -EINVAL;
+
+       if (WARN_ON(intspec[0] >= NR_IRQS))
+               return -EINVAL;
+
+       *out_hwirq = intspec[0];
+       *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       priv->irq_types[intspec[0]] = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       return 0;
+}
+
+static const struct irq_domain_ops pic32_irq_domain_ops = {
+       .map    = pic32_irq_domain_map,
+       .xlate  = pic32_irq_domain_xlate,
+};
+
+static void __init pic32_ext_irq_of_init(struct irq_domain *domain)
+{
+       struct device_node *node = irq_domain_get_of_node(domain);
+       struct evic_chip_data *priv = domain->host_data;
+       struct property *prop;
+       const __le32 *p;
+       u32 hwirq;
+       int i = 0;
+       const char *pname = "microchip,external-irqs";
+
+       of_property_for_each_u32(node, pname, prop, p, hwirq) {
+               if (i >= ARRAY_SIZE(priv->ext_irqs)) {
+                       pr_warn("More than %d external irq, skip rest\n",
+                               ARRAY_SIZE(priv->ext_irqs));
+                       break;
+               }
+
+               priv->ext_irqs[i] = hwirq;
+               i++;
+       }
+}
+
+static int __init pic32_of_init(struct device_node *node,
+                               struct device_node *parent)
+{
+       struct irq_chip_generic *gc;
+       struct evic_chip_data *priv;
+       unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+       int nchips, ret;
+       int i;
+
+       nchips = DIV_ROUND_UP(NR_IRQS, 32);
+
+       evic_base = of_iomap(node, 0);
+       if (!evic_base)
+               return -ENOMEM;
+
+       priv = kcalloc(nchips, sizeof(*priv), GFP_KERNEL);
+       if (!priv) {
+               ret = -ENOMEM;
+               goto err_iounmap;
+       }
+
+       evic_irq_domain = irq_domain_add_linear(node, nchips * 32,
+                                               &pic32_irq_domain_ops,
+                                               priv);
+       if (!evic_irq_domain) {
+               ret = -ENOMEM;
+               goto err_free_priv;
+       }
+
+       /*
+        * The PIC32 EVIC has a linear list of irqs and the type of each
+        * irq is determined by the hardware peripheral the EVIC is arbitrating.
+        * These irq types are defined in the datasheet as "persistent" and
+        * "non-persistent" which are mapped here to level and edge
+        * respectively. To manage the different flow handler requirements of
+        * each irq type, different chip_types are used.
+        */
+       ret = irq_alloc_domain_generic_chips(evic_irq_domain, 32, 2,
+                                            "evic-level", handle_level_irq,
+                                            clr, 0, 0);
+       if (ret)
+               goto err_domain_remove;
+
+       board_bind_eic_interrupt = &pic32_bind_evic_interrupt;
+
+       for (i = 0; i < nchips; i++) {
+               u32 ifsclr = PIC32_CLR(REG_IFS_OFFSET + (i * 0x10));
+               u32 iec = REG_IEC_OFFSET + (i * 0x10);
+
+               gc = irq_get_domain_generic_chip(evic_irq_domain, i * 32);
+
+               gc->reg_base = evic_base;
+               gc->unused = 0;
+
+               /*
+                * Level/persistent interrupts have a special requirement that
+                * the condition generating the interrupt be cleared before the
+                * interrupt flag (ifs) can be cleared. chip.irq_eoi is used to
+                * complete the interrupt with an ack.
+                */
+               gc->chip_types[0].type                  = IRQ_TYPE_LEVEL_MASK;
+               gc->chip_types[0].handler               = handle_fasteoi_irq;
+               gc->chip_types[0].regs.ack              = ifsclr;
+               gc->chip_types[0].regs.mask             = iec;
+               gc->chip_types[0].chip.name             = "evic-level";
+               gc->chip_types[0].chip.irq_eoi          = irq_gc_ack_set_bit;
+               gc->chip_types[0].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[0].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[0].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               /* Edge interrupts */
+               gc->chip_types[1].type                  = IRQ_TYPE_EDGE_BOTH;
+               gc->chip_types[1].handler               = handle_edge_irq;
+               gc->chip_types[1].regs.ack              = ifsclr;
+               gc->chip_types[1].regs.mask             = iec;
+               gc->chip_types[1].chip.name             = "evic-edge";
+               gc->chip_types[1].chip.irq_ack          = irq_gc_ack_set_bit;
+               gc->chip_types[1].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[1].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[1].chip.irq_set_type     = pic32_set_type_edge;
+               gc->chip_types[1].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               gc->private = &priv[i];
+       }
+
+       irq_set_default_host(evic_irq_domain);
+
+       /*
+        * External interrupts have software configurable edge polarity. These
+        * interrupts are defined in DT allowing polarity to be configured only
+        * for these interrupts when requested.
+        */
+       pic32_ext_irq_of_init(evic_irq_domain);
+
+       return 0;
+
+err_domain_remove:
+       irq_domain_remove(evic_irq_domain);
+
+err_free_priv:
+       kfree(priv);
+
+err_iounmap:
+       iounmap(evic_base);
+
+       return ret;
+}
+
+IRQCHIP_DECLARE(pic32_evic, "microchip,pic32mzda-evic", pic32_of_init);
index c71914e..5dc5a76 100644 (file)
@@ -605,7 +605,7 @@ err:
        return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
index 546d05f..b2bbe86 100644 (file)
@@ -81,6 +81,7 @@ config STI_MBOX
 config MAILBOX_TEST
        tristate "Mailbox Test Client"
        depends on OF
+       depends on HAS_IOMEM
        help
          Test client to help with testing new Controller driver
          implementations.
index 45d85ae..8f779a1 100644 (file)
@@ -81,16 +81,10 @@ static struct mbox_controller pcc_mbox_ctrl = {};
  */
 static struct mbox_chan *get_pcc_channel(int id)
 {
-       struct mbox_chan *pcc_chan;
-
        if (id < 0 || id > pcc_mbox_ctrl.num_chans)
                return ERR_PTR(-ENOENT);
 
-       pcc_chan = (struct mbox_chan *)
-               (unsigned long) pcc_mbox_channels +
-               (id * sizeof(*pcc_chan));
-
-       return pcc_chan;
+       return &pcc_mbox_channels[id];
 }
 
 /**
index 4f22e91..d80cce4 100644 (file)
@@ -210,10 +210,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
        struct block_device *bdev;
        struct mddev *mddev = bitmap->mddev;
        struct bitmap_storage *store = &bitmap->storage;
-       int node_offset = 0;
-
-       if (mddev_is_clustered(bitmap->mddev))
-               node_offset = bitmap->cluster_slot * store->file_pages;
 
        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
                int size = PAGE_SIZE;
index 4a8e150..685aa2d 100644 (file)
@@ -170,7 +170,7 @@ static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
                conf->nfaults = n+1;
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void faulty_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct faulty_conf *conf = mddev->private;
        int failit = 0;
@@ -226,7 +226,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        generic_make_request(bio);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void faulty_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct faulty_conf *conf = mddev->private;
        int n;
@@ -259,7 +259,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
 }
 
 
-static int reshape(struct mddev *mddev)
+static int faulty_reshape(struct mddev *mddev)
 {
        int mode = mddev->new_layout & ModeMask;
        int count = mddev->new_layout >> ModeShift;
@@ -299,7 +299,7 @@ static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disk
        return sectors;
 }
 
-static int run(struct mddev *mddev)
+static int faulty_run(struct mddev *mddev)
 {
        struct md_rdev *rdev;
        int i;
@@ -327,7 +327,7 @@ static int run(struct mddev *mddev)
        md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
        mddev->private = conf;
 
-       reshape(mddev);
+       faulty_reshape(mddev);
 
        return 0;
 }
@@ -344,11 +344,11 @@ static struct md_personality faulty_personality =
        .name           = "faulty",
        .level          = LEVEL_FAULTY,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = faulty_make_request,
+       .run            = faulty_run,
        .free           = faulty_free,
-       .status         = status,
-       .check_reshape  = reshape,
+       .status         = faulty_status,
+       .check_reshape  = faulty_reshape,
        .size           = faulty_size,
 };
 
index 0ded8e9..dd97d42 100644 (file)
@@ -293,6 +293,7 @@ static void recover_bitmaps(struct md_thread *thread)
 dlm_unlock:
                dlm_unlock_sync(bm_lockres);
 clear_bit:
+               lockres_free(bm_lockres);
                clear_bit(slot, &cinfo->recovery_map);
        }
 }
@@ -682,8 +683,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                bm_lockres = lockres_init(mddev, str, NULL, 1);
                if (!bm_lockres)
                        return -ENOMEM;
-               if (i == (cinfo->slot_number - 1))
+               if (i == (cinfo->slot_number - 1)) {
+                       lockres_free(bm_lockres);
                        continue;
+               }
 
                bm_lockres->flags |= DLM_LKF_NOQUEUE;
                ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
@@ -858,6 +861,7 @@ static int leave(struct mddev *mddev)
        lockres_free(cinfo->token_lockres);
        lockres_free(cinfo->ack_lockres);
        lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->resync_lockres);
        lockres_free(cinfo->bitmap_lockres);
        unlock_all_bitmaps(mddev);
        dlm_release_lockspace(cinfo->lockspace, 2);
index c4b9134..4e3843f 100644 (file)
@@ -1044,7 +1044,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
        kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r1conf *conf = mddev->private;
        struct raid1_info *mirror;
@@ -1422,7 +1422,7 @@ read_again:
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r1conf *conf = mddev->private;
        int i;
@@ -1439,7 +1439,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
        seq_printf(seq, "]");
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
@@ -2472,7 +2472,8 @@ static int init_resync(struct r1conf *conf)
  * that can be installed to exclude normal IO requests.
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                  int *skipped)
 {
        struct r1conf *conf = mddev->private;
        struct r1bio *r1_bio;
@@ -2890,7 +2891,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 }
 
 static void raid1_free(struct mddev *mddev, void *priv);
-static int run(struct mddev *mddev)
+static int raid1_run(struct mddev *mddev)
 {
        struct r1conf *conf;
        int i;
@@ -3170,15 +3171,15 @@ static struct md_personality raid1_personality =
        .name           = "raid1",
        .level          = 1,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid1_make_request,
+       .run            = raid1_run,
        .free           = raid1_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid1_status,
+       .error_handler  = raid1_error,
        .hot_add_disk   = raid1_add_disk,
        .hot_remove_disk= raid1_remove_disk,
        .spare_active   = raid1_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid1_sync_request,
        .resize         = raid1_resize,
        .size           = raid1_size,
        .check_reshape  = raid1_reshape,
index ce959b4..1c1447d 100644 (file)
@@ -1442,7 +1442,7 @@ retry_write:
        one_write_done(r10_bio);
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct r10conf *conf = mddev->private;
        sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
@@ -1484,7 +1484,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r10conf *conf = mddev->private;
        int i;
@@ -1562,7 +1562,7 @@ static int enough(struct r10conf *conf, int ignore)
                _enough(conf, 1, ignore);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
@@ -2802,7 +2802,7 @@ static int init_resync(struct r10conf *conf)
  *
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                             int *skipped)
 {
        struct r10conf *conf = mddev->private;
@@ -3523,7 +3523,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
        return ERR_PTR(err);
 }
 
-static int run(struct mddev *mddev)
+static int raid10_run(struct mddev *mddev)
 {
        struct r10conf *conf;
        int i, disk_idx, chunk_size;
@@ -4617,15 +4617,15 @@ static struct md_personality raid10_personality =
        .name           = "raid10",
        .level          = 10,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid10_make_request,
+       .run            = raid10_run,
        .free           = raid10_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid10_status,
+       .error_handler  = raid10_error,
        .hot_add_disk   = raid10_add_disk,
        .hot_remove_disk= raid10_remove_disk,
        .spare_active   = raid10_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid10_sync_request,
        .quiesce        = raid10_quiesce,
        .size           = raid10_size,
        .resize         = raid10_resize,
index a086014..b4f02c9 100644 (file)
@@ -2496,7 +2496,7 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r5conf *conf = mddev->private;
@@ -2958,7 +2958,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
         * If several bio share a stripe. The bio bi_phys_segments acts as a
         * reference count to avoid race. The reference count should already be
         * increased before this function is called (for example, in
-        * make_request()), so other bio sharing this stripe will not free the
+        * raid5_make_request()), so other bio sharing this stripe will not free the
         * stripe. If a stripe is owned by one stripe, the stripe lock will
         * protect it.
         */
@@ -5135,7 +5135,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
        }
 }
 
-static void make_request(struct mddev *mddev, struct bio * bi)
+static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
        int dd_idx;
@@ -5225,7 +5225,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                new_sector = raid5_compute_sector(conf, logical_sector,
                                                  previous,
                                                  &dd_idx, NULL);
-               pr_debug("raid456: make_request, sector %llu logical %llu\n",
+               pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
                        (unsigned long long)new_sector,
                        (unsigned long long)logical_sector);
 
@@ -5575,7 +5575,8 @@ ret:
        return retn;
 }
 
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                         int *skipped)
 {
        struct r5conf *conf = mddev->private;
        struct stripe_head *sh;
@@ -6674,7 +6675,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
        return 0;
 }
 
-static int run(struct mddev *mddev)
+static int raid5_run(struct mddev *mddev)
 {
        struct r5conf *conf;
        int working_disks = 0;
@@ -7048,7 +7049,7 @@ static void raid5_free(struct mddev *mddev, void *priv)
        mddev->to_remove = &raid5_attrs_group;
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
        int i;
@@ -7864,15 +7865,15 @@ static struct md_personality raid6_personality =
        .name           = "raid6",
        .level          = 6,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid6_check_reshape,
@@ -7887,15 +7888,15 @@ static struct md_personality raid5_personality =
        .name           = "raid5",
        .level          = 5,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
@@ -7911,15 +7912,15 @@ static struct md_personality raid4_personality =
        .name           = "raid4",
        .level          = 4,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
index 0e209b5..c6abeb4 100644 (file)
@@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe)
 {
        struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache;
        struct tda1004x_state* state = fe->demodulator_priv;
+       int status;
 
        dprintk("%s\n", __func__);
 
+       status = tda1004x_read_byte(state, TDA1004X_STATUS_CD);
+       if (status == -1)
+               return -EIO;
+
+       /* Only update the properties cache if device is locked */
+       if (!(status & 8))
+               return 0;
+
        // inversion status
        fe_params->inversion = INVERSION_OFF;
        if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20)
index 8304919..bf82726 100644 (file)
@@ -478,7 +478,6 @@ static const struct i2c_device_id ir_kbd_id[] = {
        { "ir_rx_z8f0811_hdpvr", 0 },
        { }
 };
-MODULE_DEVICE_TABLE(i2c, ir_kbd_id);
 
 static struct i2c_driver ir_kbd_driver = {
        .driver = {
index b9e43ff..cbe4711 100644 (file)
@@ -144,8 +144,7 @@ static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
        mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
        if (mf) {
                mutex_lock(&sensor->lock);
-               if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-                       *mf = fmt->format;
+               *mf = fmt->format;
                mutex_unlock(&sensor->lock);
        }
        return 0;
index 1d2c310..94f8162 100644 (file)
@@ -1211,6 +1211,8 @@ static int alsa_device_init(struct saa7134_dev *dev)
 
 static int alsa_device_exit(struct saa7134_dev *dev)
 {
+       if (!snd_saa7134_cards[dev->nr])
+               return 1;
 
        snd_card_free(snd_saa7134_cards[dev->nr]);
        snd_saa7134_cards[dev->nr] = NULL;
@@ -1260,7 +1262,8 @@ static void saa7134_alsa_exit(void)
        int idx;
 
        for (idx = 0; idx < SNDRV_CARDS; idx++) {
-               snd_card_free(snd_saa7134_cards[idx]);
+               if (snd_saa7134_cards[idx])
+                       snd_card_free(snd_saa7134_cards[idx]);
        }
 
        saa7134_dmasound_init = NULL;
index 5263594..8b89ebe 100644 (file)
@@ -215,6 +215,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC
 config VIDEO_STI_BDISP
        tristate "STMicroelectronics BDISP 2D blitter driver"
        depends on VIDEO_DEV && VIDEO_V4L2
+       depends on HAS_DMA
        depends on ARCH_STI || COMPILE_TEST
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
index 40423c6..57d42c6 100644 (file)
@@ -1,6 +1,6 @@
 
 config VIDEO_SAMSUNG_EXYNOS4_IS
-       bool "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
+       tristate "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
        depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
        depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
        depends on OF && COMMON_CLK
index 49658ca..979c388 100644 (file)
@@ -631,6 +631,12 @@ static int fimc_is_hw_open_sensor(struct fimc_is *is,
 
        fimc_is_mem_barrier();
 
+       /*
+        * Some user space use cases hang up here without this
+        * empirically chosen delay.
+        */
+       udelay(100);
+
        mcuctl_write(HIC_OPEN_SENSOR, is, MCUCTL_REG_ISSR(0));
        mcuctl_write(is->sensor_index, is, MCUCTL_REG_ISSR(1));
        mcuctl_write(sensor->drvdata->id, is, MCUCTL_REG_ISSR(2));
index bf9261e..c081672 100644 (file)
@@ -218,8 +218,8 @@ static void isp_video_capture_buffer_queue(struct vb2_buffer *vb)
                                                        ivb->dma_addr[i];
 
                        isp_dbg(2, &video->ve.vdev,
-                               "dma_buf %pad (%d/%d/%d) addr: %pad\n",
-                               &buf_index, ivb->index, i, vb->index,
+                               "dma_buf %d (%d/%d/%d) addr: %pad\n",
+                               buf_index, ivb->index, i, vb->index,
                                &ivb->dma_addr[i]);
                }
 
index f3b2dd3..e79ddbb 100644 (file)
@@ -185,6 +185,37 @@ error:
        return ret;
 }
 
+/**
+ * __fimc_pipeline_enable - enable power of all pipeline subdevs
+ *                         and the sensor clock
+ * @ep: video pipeline structure
+ * @fmd: fimc media device
+ *
+ * Called with the graph mutex held.
+ */
+static int __fimc_pipeline_enable(struct exynos_media_pipeline *ep,
+                                 struct fimc_md *fmd)
+{
+       struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       int ret;
+
+       /* Enable PXLASYNC clock if this pipeline includes FIMC-IS */
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
+               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
+               if (ret < 0)
+                       return ret;
+       }
+
+       ret = fimc_pipeline_s_power(p, 1);
+       if (!ret)
+               return 0;
+
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
+               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
+
+       return ret;
+}
+
 /**
  * __fimc_pipeline_open - update the pipeline information, enable power
  *                        of all pipeline subdevs and the sensor clock
@@ -199,7 +230,6 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
        struct fimc_md *fmd = entity_to_fimc_mdev(me);
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
        struct v4l2_subdev *sd;
-       int ret;
 
        if (WARN_ON(p == NULL || me == NULL))
                return -EINVAL;
@@ -208,24 +238,16 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
                fimc_pipeline_prepare(p, me);
 
        sd = p->subdevs[IDX_SENSOR];
-       if (sd == NULL)
-               return -EINVAL;
-
-       /* Disable PXLASYNC clock if this pipeline includes FIMC-IS */
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
-               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
-               if (ret < 0)
-                       return ret;
-       }
-
-       ret = fimc_pipeline_s_power(p, 1);
-       if (!ret)
+       if (sd == NULL) {
+               pr_warn("%s(): No sensor subdev\n", __func__);
+               /*
+                * Pipeline open cannot fail so as to make it possible
+                * for the user space to configure the pipeline.
+                */
                return 0;
+       }
 
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
-               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
-
-       return ret;
+       return __fimc_pipeline_enable(ep, fmd);
 }
 
 /**
@@ -269,10 +291,43 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                { IDX_CSIS, IDX_FLITE, IDX_FIMC, IDX_SENSOR, IDX_IS_ISP },
        };
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       struct fimc_md *fmd = entity_to_fimc_mdev(&p->subdevs[IDX_CSIS]->entity);
+       enum fimc_subdev_index sd_id;
        int i, ret = 0;
 
-       if (p->subdevs[IDX_SENSOR] == NULL)
-               return -ENODEV;
+       if (p->subdevs[IDX_SENSOR] == NULL) {
+               if (!fmd->user_subdev_api) {
+                       /*
+                        * Sensor must be already discovered if we
+                        * aren't in the user_subdev_api mode
+                        */
+                       return -ENODEV;
+               }
+
+               /* Get pipeline sink entity */
+               if (p->subdevs[IDX_FIMC])
+                       sd_id = IDX_FIMC;
+               else if (p->subdevs[IDX_IS_ISP])
+                       sd_id = IDX_IS_ISP;
+               else if (p->subdevs[IDX_FLITE])
+                       sd_id = IDX_FLITE;
+               else
+                       return -ENODEV;
+
+               /*
+                * Sensor could have been linked between open and STREAMON -
+                * check if this is the case.
+                */
+               fimc_pipeline_prepare(p, &p->subdevs[sd_id]->entity);
+
+               if (p->subdevs[IDX_SENSOR] == NULL)
+                       return -ENODEV;
+
+               ret = __fimc_pipeline_enable(ep, fmd);
+               if (ret < 0)
+                       return ret;
+
+       }
 
        for (i = 0; i < IDX_MAX; i++) {
                unsigned int idx = seq[on][i];
@@ -282,8 +337,10 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
                        goto error;
        }
+
        return 0;
 error:
+       fimc_pipeline_s_power(p, !on);
        for (; i >= 0; i--) {
                unsigned int idx = seq[on][i];
                v4l2_subdev_call(p->subdevs[idx], video, s_stream, !on);
index c398b28..1af779e 100644 (file)
@@ -795,7 +795,7 @@ static int isi_camera_get_formats(struct soc_camera_device *icd,
                        xlate->host_fmt = &isi_camera_formats[i];
                        xlate->code     = code.code;
                        dev_dbg(icd->parent, "Providing format %s using code %d\n",
-                               isi_camera_formats[0].name, code.code);
+                               xlate->host_fmt->name, xlate->code);
                }
                break;
        default:
index cc84c6d..46c7186 100644 (file)
@@ -1493,6 +1493,8 @@ static void soc_camera_async_unbind(struct v4l2_async_notifier *notifier,
                                        struct soc_camera_async_client, notifier);
        struct soc_camera_device *icd = platform_get_drvdata(sasc->pdev);
 
+       icd->control = NULL;
+
        if (icd->clk) {
                v4l2_clk_unregister(icd->clk);
                icd->clk = NULL;
index 42dff9d..533bc79 100644 (file)
@@ -256,7 +256,7 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 
        /* Create links. */
        list_for_each_entry(entity, &vsp1->entities, list_dev) {
-               if (entity->type == VSP1_ENTITY_LIF) {
+               if (entity->type == VSP1_ENTITY_WPF) {
                        ret = vsp1_wpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
@@ -264,7 +264,10 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
                        ret = vsp1_rpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
-               } else {
+               }
+
+               if (entity->type != VSP1_ENTITY_LIF &&
+                   entity->type != VSP1_ENTITY_RPF) {
                        ret = vsp1_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
index 637d0d6..b4dca57 100644 (file)
@@ -515,7 +515,7 @@ static bool vsp1_pipeline_stopped(struct vsp1_pipeline *pipe)
        bool stopped;
 
        spin_lock_irqsave(&pipe->irqlock, flags);
-       stopped = pipe->state == VSP1_PIPELINE_STOPPED,
+       stopped = pipe->state == VSP1_PIPELINE_STOPPED;
        spin_unlock_irqrestore(&pipe->irqlock, flags);
 
        return stopped;
index c5d49d7..ff8953a 100644 (file)
@@ -1063,8 +1063,11 @@ EXPORT_SYMBOL_GPL(vb2_discard_done);
  */
 static int __qbuf_mmap(struct vb2_buffer *vb, const void *pb)
 {
-       int ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
-                       vb, pb, vb->planes);
+       int ret = 0;
+
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, vb->planes);
        return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
 }
 
@@ -1077,14 +1080,16 @@ static int __qbuf_userptr(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1192,14 +1197,16 @@ static int __qbuf_dmabuf(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1520,7 +1527,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
        q->waiting_for_buffers = false;
        vb->state = VB2_BUF_STATE_QUEUED;
 
-       call_void_bufop(q, copy_timestamp, vb, pb);
+       if (pb)
+               call_void_bufop(q, copy_timestamp, vb, pb);
 
        trace_vb2_qbuf(q, vb);
 
@@ -1532,7 +1540,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
                __enqueue_in_driver(vb);
 
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /*
         * If streamon has been called, and we haven't yet called
@@ -1731,7 +1740,8 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
  * The return values from this function are intended to be directly returned
  * from vidioc_dqbuf handler in driver.
  */
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking)
 {
        struct vb2_buffer *vb = NULL;
        int ret;
@@ -1754,8 +1764,12 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
 
        call_void_vb_qop(vb, buf_finish, vb);
 
+       if (pindex)
+               *pindex = vb->index;
+
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /* Remove from videobuf queue */
        list_del(&vb->queued_entry);
@@ -1828,7 +1842,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
         * that's done in dqbuf, but that's not going to happen when we
         * cancel the whole queue. Note: this code belongs here, not in
         * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical
-        * call to __fill_v4l2_buffer() after buf_finish(). That order can't
+        * call to __fill_user_buffer() after buf_finish(). That order can't
         * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
         */
        for (i = 0; i < q->num_buffers; ++i) {
@@ -2357,7 +2371,6 @@ struct vb2_fileio_data {
        unsigned int count;
        unsigned int type;
        unsigned int memory;
-       struct vb2_buffer *b;
        struct vb2_fileio_buf bufs[VB2_MAX_FRAME];
        unsigned int cur_index;
        unsigned int initial_index;
@@ -2410,12 +2423,6 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
        if (fileio == NULL)
                return -ENOMEM;
 
-       fileio->b = kzalloc(q->buf_struct_size, GFP_KERNEL);
-       if (fileio->b == NULL) {
-               kfree(fileio);
-               return -ENOMEM;
-       }
-
        fileio->read_once = q->fileio_read_once;
        fileio->write_immediately = q->fileio_write_immediately;
 
@@ -2460,13 +2467,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
                 * Queue all buffers.
                 */
                for (i = 0; i < q->num_buffers; i++) {
-                       struct vb2_buffer *b = fileio->b;
-
-                       memset(b, 0, q->buf_struct_size);
-                       b->type = q->type;
-                       b->memory = q->memory;
-                       b->index = i;
-                       ret = vb2_core_qbuf(q, i, b);
+                       ret = vb2_core_qbuf(q, i, NULL);
                        if (ret)
                                goto err_reqbufs;
                        fileio->bufs[i].queued = 1;
@@ -2511,7 +2512,6 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q)
                q->fileio = NULL;
                fileio->count = 0;
                vb2_core_reqbufs(q, fileio->memory, &fileio->count);
-               kfree(fileio->b);
                kfree(fileio);
                dprintk(3, "file io emulator closed\n");
        }
@@ -2539,7 +2539,8 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * else is able to provide this information with the write() operation.
         */
        bool copy_timestamp = !read && q->copy_timestamp;
-       int ret, index;
+       unsigned index;
+       int ret;
 
        dprintk(3, "mode %s, offset %ld, count %zd, %sblocking\n",
                read ? "read" : "write", (long)*ppos, count,
@@ -2564,22 +2565,20 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         */
        index = fileio->cur_index;
        if (index >= q->num_buffers) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               ret = vb2_core_dqbuf(q, b, nonblock);
+               ret = vb2_core_dqbuf(q, &index, NULL, nonblock);
                dprintk(5, "vb2_dqbuf result: %d\n", ret);
                if (ret)
                        return ret;
                fileio->dq_count += 1;
 
-               fileio->cur_index = index = b->index;
+               fileio->cur_index = index;
                buf = &fileio->bufs[index];
+               b = q->bufs[index];
 
                /*
                 * Get number of bytes filled by the driver
@@ -2630,7 +2629,7 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * Queue next buffer if required.
         */
        if (buf->pos == buf->size || (!read && fileio->write_immediately)) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b = q->bufs[index];
 
                /*
                 * Check if this is the last buffer to read.
@@ -2643,15 +2642,11 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
                /*
                 * Call vb2_qbuf and give buffer to the driver.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               b->index = index;
                b->planes[0].bytesused = buf->pos;
 
                if (copy_timestamp)
                        b->timestamp = ktime_get_ns();
-               ret = vb2_core_qbuf(q, index, b);
+               ret = vb2_core_qbuf(q, index, NULL);
                dprintk(5, "vb2_dbuf result: %d\n", ret);
                if (ret)
                        return ret;
@@ -2713,10 +2708,9 @@ static int vb2_thread(void *data)
 {
        struct vb2_queue *q = data;
        struct vb2_threadio_data *threadio = q->threadio;
-       struct vb2_fileio_data *fileio = q->fileio;
        bool copy_timestamp = false;
-       int prequeue = 0;
-       int index = 0;
+       unsigned prequeue = 0;
+       unsigned index = 0;
        int ret = 0;
 
        if (q->is_output) {
@@ -2728,37 +2722,34 @@ static int vb2_thread(void *data)
 
        for (;;) {
                struct vb2_buffer *vb;
-               struct vb2_buffer *b = fileio->b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
                if (prequeue) {
-                       b->index = index++;
+                       vb = q->bufs[index++];
                        prequeue--;
                } else {
                        call_void_qop(q, wait_finish, q);
                        if (!threadio->stop)
-                               ret = vb2_core_dqbuf(q, b, 0);
+                               ret = vb2_core_dqbuf(q, &index, NULL, 0);
                        call_void_qop(q, wait_prepare, q);
                        dprintk(5, "file io: vb2_dqbuf result: %d\n", ret);
+                       if (!ret)
+                               vb = q->bufs[index];
                }
                if (ret || threadio->stop)
                        break;
                try_to_freeze();
 
-               vb = q->bufs[b->index];
-               if (b->state == VB2_BUF_STATE_DONE)
+               if (vb->state != VB2_BUF_STATE_ERROR)
                        if (threadio->fnc(vb, threadio->priv))
                                break;
                call_void_qop(q, wait_finish, q);
                if (copy_timestamp)
-                       b->timestamp = ktime_get_ns();;
+                       vb->timestamp = ktime_get_ns();;
                if (!threadio->stop)
-                       ret = vb2_core_qbuf(q, b->index, b);
+                       ret = vb2_core_qbuf(q, vb->index, NULL);
                call_void_qop(q, wait_prepare, q);
                if (ret || threadio->stop)
                        break;
index c9a2860..91f5521 100644 (file)
@@ -625,7 +625,7 @@ static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b,
                return -EINVAL;
        }
 
-       ret = vb2_core_dqbuf(q, b, nonblocking);
+       ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
 
        return ret;
 }
index 154aced..65cc0ac 100644 (file)
@@ -170,7 +170,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
                str = "invalid";
                break;
        }
-       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->chip_select, str);
+       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->signal_voltage, str);
 
        switch (ios->drv_type) {
        case MMC_SET_DRIVER_TYPE_A:
index 2b16263..aba786d 100644 (file)
@@ -29,15 +29,18 @@ struct mmc_pwrseq_simple {
 static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
                                              int value)
 {
-       int i;
        struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
-       int values[reset_gpios->ndescs];
 
-       for (i = 0; i < reset_gpios->ndescs; i++)
-               values[i] = value;
+       if (!IS_ERR(reset_gpios)) {
+               int i;
+               int values[reset_gpios->ndescs];
 
-       gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc,
-                                      values);
+               for (i = 0; i < reset_gpios->ndescs; i++)
+                       values[i] = value;
+
+               gpiod_set_array_value_cansleep(
+                       reset_gpios->ndescs, reset_gpios->desc, values);
+       }
 }
 
 static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
@@ -79,7 +82,8 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
        struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
                                        struct mmc_pwrseq_simple, pwrseq);
 
-       gpiod_put_array(pwrseq->reset_gpios);
+       if (!IS_ERR(pwrseq->reset_gpios))
+               gpiod_put_array(pwrseq->reset_gpios);
 
        if (!IS_ERR(pwrseq->ext_clk))
                clk_put(pwrseq->ext_clk);
@@ -112,7 +116,9 @@ struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
        }
 
        pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(pwrseq->reset_gpios)) {
+       if (IS_ERR(pwrseq->reset_gpios) &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
                ret = PTR_ERR(pwrseq->reset_gpios);
                goto clk_put;
        }
index f2b164b..bb39a29 100644 (file)
@@ -329,6 +329,7 @@ static int mmc_read_switch(struct mmc_card *card)
                card->sw_caps.sd3_bus_mode = status[13];
                /* Driver Strengths supported by the card */
                card->sw_caps.sd3_drv_type = status[9];
+               card->sw_caps.sd3_curr_limit = status[7] | status[6] << 8;
        }
 
 out:
@@ -545,14 +546,25 @@ static int sd_set_current_limit(struct mmc_card *card, u8 *status)
         * when we set current limit to 200ma, the card will draw 200ma, and
         * when we set current limit to 400/600/800ma, the card will draw its
         * maximum 300ma from the host.
+        *
+        * The above is incorrect: if we try to set a current limit that is
+        * not supported by the card, the card can rightfully error out the
+        * attempt, and remain at the default current limit.  This results
+        * in a 300mA card being limited to 200mA even though the host
+        * supports 800mA. Failures seen with SanDisk 8GB UHS cards with
+        * an iMX6 host. --rmk
         */
-       if (max_current >= 800)
+       if (max_current >= 800 &&
+           card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
                current_limit = SD_SET_CURRENT_LIMIT_800;
-       else if (max_current >= 600)
+       else if (max_current >= 600 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
                current_limit = SD_SET_CURRENT_LIMIT_600;
-       else if (max_current >= 400)
+       else if (max_current >= 400 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
                current_limit = SD_SET_CURRENT_LIMIT_400;
-       else if (max_current >= 200)
+       else if (max_current >= 200 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
                current_limit = SD_SET_CURRENT_LIMIT_200;
 
        if (current_limit != SD_SET_CURRENT_NO_CHANGE) {
@@ -626,9 +638,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-               (card->sd_bus_speed == UHS_SDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_DDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) {
+               (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
                err = mmc_execute_tuning(card);
 
                /*
@@ -638,7 +650,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
                 * difference between v3.00 and 3.01 spec means that CMD19
                 * tuning is also available for DDR50 mode.
                 */
-               if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) {
+               if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) {
                        pr_warn("%s: ddr50 tuning failed\n",
                                mmc_hostname(card->host));
                        err = 0;
index d61ba1a..467b3cf 100644 (file)
@@ -535,8 +535,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-           ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) ||
-            (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)))
+           ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) ||
+             (card->host->ios.timing == MMC_TIMING_UHS_SDR104)))
                err = mmc_execute_tuning(card);
 out:
        return err;
index 8e94e55..6f6fc52 100644 (file)
@@ -223,6 +223,7 @@ static const struct cis_tpl cis_tpl_list[] = {
        {       0x20,   4,      cistpl_manfid           },
        {       0x21,   2,      /* cistpl_funcid */     },
        {       0x22,   0,      cistpl_funce            },
+       {       0x91,   2,      /* cistpl_sdio_std */   },
 };
 
 static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
index fb26674..0d6ca41 100644 (file)
@@ -151,6 +151,7 @@ static struct variant_data variant_nomadik = {
        .fifosize               = 16 * 4,
        .fifohalfsize           = 8 * 4,
        .clkreg                 = MCI_CLK_ENABLE,
+       .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
        .datalength_bits        = 24,
        .datactrl_mask_sdio     = MCI_ST_DPSM_SDIOEN,
        .st_sdio                = true,
@@ -1886,7 +1887,7 @@ static struct amba_id mmci_ids[] = {
        {
                .id     = 0x00280180,
                .mask   = 0x00ffffff,
-               .data   = &variant_u300,
+               .data   = &variant_nomadik,
        },
        {
                .id     = 0x00480180,
index e4b05db..4a0d6b8 100644 (file)
@@ -94,9 +94,9 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -116,8 +116,8 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
-               desc, cookie, host->sg_len);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
+               desc, host->sg_len);
 }
 
 static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
@@ -174,9 +174,9 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -196,8 +196,7 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
-               desc, cookie);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
 }
 
 void tmio_mmc_start_dma(struct tmio_mmc_host *host,
index 4409369..cec3188 100644 (file)
@@ -24,6 +24,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm963xx_tag.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/bcm963xx_tag.h>
 #include <asm/mach-bcm63xx/board_bcm963xx.h>
 
-#define BCM63XX_EXTENDED_SIZE  0xBFC00000      /* Extended flash address */
-
 #define BCM63XX_CFE_BLOCK_SIZE SZ_64K          /* always at least 64KiB */
 
 #define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
@@ -123,8 +121,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
                pr_info("CFE boot tag found with version %s and board type %s\n",
                        tagversion, boardid);
 
-               kerneladdr = kerneladdr - BCM63XX_EXTENDED_SIZE;
-               rootfsaddr = rootfsaddr - BCM63XX_EXTENDED_SIZE;
+               kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
+               rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
                spareaddr = roundup(totallen, master->erasesize) + cfelen;
 
                if (rootfsaddr < kerneladdr) {
index 54e056d..ee2b74d 100644 (file)
@@ -174,9 +174,9 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
        struct ubi_device *ubi = desc->vol->ubi;
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ubi_sync(ubi->ubi_num);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 9fe33fc..cf34681 100644 (file)
@@ -1532,7 +1532,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
 
        /* no PVID with ranges, otherwise it's a bug */
        if (pvid)
-               err = _mv88e6xxx_port_pvid_set(ds, port, vid);
+               err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end);
 unlock:
        mutex_unlock(&ps->smi_mutex);
 
@@ -2163,7 +2163,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
         * database, and allow every port to egress frames on all other ports.
         */
        reg = BIT(ps->num_ports) - 1; /* all ports */
-       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg & ~port);
+       reg &= ~BIT(port); /* except itself */
+       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg);
        if (ret)
                goto abort;
 
index a4799c1..5eb9b20 100644 (file)
@@ -628,6 +628,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
        int ret;
 
        ring = pdata->rx_ring;
+       irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
        ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                               IRQF_SHARED, ring->irq_name, ring);
        if (ret)
@@ -635,6 +636,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 
        if (pdata->cq_cnt) {
                ring = pdata->tx_ring->cp_ring;
+               irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
                ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                                       IRQF_SHARED, ring->irq_name, ring);
                if (ret) {
@@ -649,15 +651,19 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 static void xgene_enet_free_irq(struct net_device *ndev)
 {
        struct xgene_enet_pdata *pdata;
+       struct xgene_enet_desc_ring *ring;
        struct device *dev;
 
        pdata = netdev_priv(ndev);
        dev = ndev_to_dev(ndev);
-       devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+       ring = pdata->rx_ring;
+       irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+       devm_free_irq(dev, ring->irq, ring);
 
        if (pdata->cq_cnt) {
-               devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
-                             pdata->tx_ring->cp_ring);
+               ring = pdata->tx_ring->cp_ring;
+               irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+               devm_free_irq(dev, ring->irq, ring);
        }
 }
 
index 70d5b62..248dfc4 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/efi.h>
+#include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
index ecc4a33..f71ab26 100644 (file)
@@ -302,7 +302,7 @@ static int nb8800_poll(struct napi_struct *napi, int budget)
        nb8800_tx_done(dev);
 
 again:
-       while (work < budget) {
+       do {
                struct nb8800_rx_buf *rxb;
                unsigned int len;
 
@@ -330,7 +330,7 @@ again:
                rxd->report = 0;
                last = next;
                work++;
-       }
+       } while (work < budget);
 
        if (work) {
                priv->rx_descs[last].desc.config |= DESC_EOC;
index 8550df1..19f7cd0 100644 (file)
@@ -151,8 +151,11 @@ config BNX2X_VXLAN
 
 config BGMAC
        tristate "BCMA bus GBit core support"
-       depends on BCMA_HOST_SOC && HAS_DMA && (BCM47XX || ARCH_BCM_5301X)
+       depends on BCMA && BCMA_HOST_SOC
+       depends on HAS_DMA
+       depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
        select PHYLIB
+       select FIXED_PHY
        ---help---
          This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
          They can be found on BCM47xx SoCs and provide gigabit ethernet.
index df835f5..5dc89e5 100644 (file)
@@ -1490,10 +1490,11 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
 
                        last = tx_buf->nr_frags;
                        j += 2;
-                       for (k = 0; k < last; k++, j = NEXT_TX(j)) {
+                       for (k = 0; k < last; k++, j++) {
+                               int ring_idx = j & bp->tx_ring_mask;
                                skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
 
-                               tx_buf = &txr->tx_buf_ring[j];
+                               tx_buf = &txr->tx_buf_ring[ring_idx];
                                dma_unmap_page(
                                        &pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
@@ -3406,7 +3407,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
        struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
        u16 error_code;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, -1, -1);
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
        req.ring_type = ring_type;
        req.ring_id = cpu_to_le16(ring->fw_ring_id);
 
@@ -4819,8 +4820,6 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
                stats->multicast += le64_to_cpu(hw_stats->rx_mcast_pkts);
 
-               stats->rx_dropped += le64_to_cpu(hw_stats->rx_drop_pkts);
-
                stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
        }
 
index 0d77596..457c3bc 100644 (file)
@@ -401,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
         * Ethernet MAC ISRs
         */
        if (priv->internal_phy)
-               priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT;
+               priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
        return 0;
 }
index 9293675..49eea89 100644 (file)
@@ -12016,7 +12016,7 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
        int ret;
        u32 offset, len, b_offset, odd_len;
        u8 *buf;
-       __be32 start, end;
+       __be32 start = 0, end;
 
        if (tg3_flag(tp, NO_NVRAM) ||
            eeprom->magic != TG3_EEPROM_MAGIC)
index 9d9984a..50c9410 100644 (file)
@@ -2823,7 +2823,7 @@ static int macb_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct device_node *phy_node;
        const struct macb_config *macb_config = NULL;
-       struct clk *pclk, *hclk, *tx_clk;
+       struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
        unsigned int queue_mask, num_queues;
        struct macb_platform_data *pdata;
        bool native_io;
index b895044..8727655 100644 (file)
@@ -1526,7 +1526,6 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
                                struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct lio *lio = container_of(ptp, struct lio, ptp_info);
        struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
@@ -1536,8 +1535,7 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
        ns += lio->ptp_adjust;
        spin_unlock_irqrestore(&lio->ptp_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
 
        return 0;
 }
index 48ecbc8..b423ad3 100644 (file)
@@ -18,6 +18,7 @@ if NET_VENDOR_EZCHIP
 config EZCHIP_NPS_MANAGEMENT_ENET
        tristate "EZchip NPS management enet support"
        depends on OF_IRQ && OF_NET
+       depends on HAS_IOMEM
        ---help---
          Simple LAN device for debug or management purposes.
          Device supports interrupts for RX and TX(completion).
index 4097c58..cbe21dc 100644 (file)
@@ -4,6 +4,9 @@
 
 obj-$(CONFIG_FEC) += fec.o
 fec-objs :=fec_main.o fec_ptp.o
+CFLAGS_fec_main.o := -D__CHECK_ENDIAN__
+CFLAGS_fec_ptp.o := -D__CHECK_ENDIAN__
+
 obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o
 ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
        obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o
index 99d33e2..2106d72 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/timecounter.h>
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
-    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 /*
  *     Just figures, Motorola would have to change the offsets for
  *     registers in the same peripheral device on different models
 
 /*
  *     Define the buffer descriptor structure.
+ *
+ *     Evidently, ARM SoCs have the FEC block generated in a
+ *     little endian mode so adjust endianness accordingly.
  */
-#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+#if defined(CONFIG_ARM)
+#define fec32_to_cpu le32_to_cpu
+#define fec16_to_cpu le16_to_cpu
+#define cpu_to_fec32 cpu_to_le32
+#define cpu_to_fec16 cpu_to_le16
+#define __fec32 __le32
+#define __fec16 __le16
+
 struct bufdesc {
-       unsigned short cbd_datlen;      /* Data length */
-       unsigned short cbd_sc;  /* Control and status info */
-       unsigned long cbd_bufaddr;      /* Buffer address */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #else
+#define fec32_to_cpu be32_to_cpu
+#define fec16_to_cpu be16_to_cpu
+#define cpu_to_fec32 cpu_to_be32
+#define cpu_to_fec16 cpu_to_be16
+#define __fec32 __be32
+#define __fec16 __be16
+
 struct bufdesc {
-       unsigned short  cbd_sc;                 /* Control and status info */
-       unsigned short  cbd_datlen;             /* Data length */
-       unsigned long   cbd_bufaddr;            /* Buffer address */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #endif
 
 struct bufdesc_ex {
        struct bufdesc desc;
-       unsigned long cbd_esc;
-       unsigned long cbd_prot;
-       unsigned long cbd_bdu;
-       unsigned long ts;
-       unsigned short res0[4];
+       __fec32 cbd_esc;
+       __fec32 cbd_prot;
+       __fec32 cbd_bdu;
+       __fec32 ts;
+       __fec16 res0[4];
 };
 
 /*
index 502da6f..41c81f6 100644 (file)
@@ -332,11 +332,13 @@ static void fec_dump(struct net_device *ndev)
        bdp = txq->tx_bd_base;
 
        do {
-               pr_info("%3u %c%c 0x%04x 0x%08lx %4u %p\n",
+               pr_info("%3u %c%c 0x%04x 0x%08x %4u %p\n",
                        index,
                        bdp == txq->cur_tx ? 'S' : ' ',
                        bdp == txq->dirty_tx ? 'H' : ' ',
-                       bdp->cbd_sc, bdp->cbd_bufaddr, bdp->cbd_datlen,
+                       fec16_to_cpu(bdp->cbd_sc),
+                       fec32_to_cpu(bdp->cbd_bufaddr),
+                       fec16_to_cpu(bdp->cbd_datlen),
                        txq->tx_skbuff[index]);
                bdp = fec_enet_get_nextdesc(bdp, fep, 0);
                index++;
@@ -389,7 +391,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
                ebdp = (struct bufdesc_ex *)bdp;
 
-               status = bdp->cbd_sc;
+               status = fec16_to_cpu(bdp->cbd_sc);
                status &= ~BD_ENET_TX_STATS;
                status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
                frag_len = skb_shinfo(skb)->frags[frag].size;
@@ -411,7 +413,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        if (skb->ip_summed == CHECKSUM_PARTIAL)
                                estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                        ebdp->cbd_bdu = 0;
-                       ebdp->cbd_esc = estatus;
+                       ebdp->cbd_esc = cpu_to_fec32(estatus);
                }
 
                bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
@@ -435,9 +437,9 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        goto dma_mapping_error;
                }
 
-               bdp->cbd_bufaddr = addr;
-               bdp->cbd_datlen = frag_len;
-               bdp->cbd_sc = status;
+               bdp->cbd_bufaddr = cpu_to_fec32(addr);
+               bdp->cbd_datlen = cpu_to_fec16(frag_len);
+               bdp->cbd_sc = cpu_to_fec16(status);
        }
 
        return bdp;
@@ -445,8 +447,8 @@ dma_mapping_error:
        bdp = txq->cur_tx;
        for (i = 0; i < frag; i++) {
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
-               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                               bdp->cbd_datlen, DMA_TO_DEVICE);
+               dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr),
+                                fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE);
        }
        return ERR_PTR(-ENOMEM);
 }
@@ -483,7 +485,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
        /* Fill in a Tx ring entry */
        bdp = txq->cur_tx;
        last_bdp = bdp;
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        /* Set buffer length and buffer pointer */
@@ -539,21 +541,21 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        index = fec_enet_get_bd_index(txq->tx_bd_base, last_bdp, fep);
        /* Save skb pointer */
        txq->tx_skbuff[index] = skb;
 
-       bdp->cbd_datlen = buflen;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(buflen);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        /* Send it on its way.  Tell FEC it's ready, interrupt when done,
         * it's the last BD of the frame, and to put the CRC on the end.
         */
        status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        /* If this was the last BD in the ring, start at the beginning again. */
        bdp = fec_enet_get_nextdesc(last_bdp, fep, queue);
@@ -585,7 +587,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        unsigned int estatus = 0;
        dma_addr_t addr;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
@@ -607,8 +609,8 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                return NETDEV_TX_BUSY;
        }
 
-       bdp->cbd_datlen = size;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(size);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -616,7 +618,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        /* Handle the last BD specially */
@@ -625,10 +627,10 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        if (is_last) {
                status |= BD_ENET_TX_INTR;
                if (fep->bufdesc_ex)
-                       ebdp->cbd_esc |= BD_ENET_TX_INT;
+                       ebdp->cbd_esc |= cpu_to_fec32(BD_ENET_TX_INT);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -647,7 +649,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
        unsigned short status;
        unsigned int estatus = 0;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
 
@@ -671,8 +673,8 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                }
        }
 
-       bdp->cbd_bufaddr = dmabuf;
-       bdp->cbd_datlen = hdr_len;
+       bdp->cbd_bufaddr = cpu_to_fec32(dmabuf);
+       bdp->cbd_datlen = cpu_to_fec16(hdr_len);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -680,10 +682,10 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -823,15 +825,15 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                        /* Initialize the BD for every fragment in the page. */
                        if (bdp->cbd_bufaddr)
-                               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+                               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
                        else
-                               bdp->cbd_sc = 0;
+                               bdp->cbd_sc = cpu_to_fec16(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
                rxq->cur_rx = rxq->rx_bd_base;
        }
@@ -844,18 +846,18 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                for (i = 0; i < txq->tx_ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
-                       bdp->cbd_sc = 0;
+                       bdp->cbd_sc = cpu_to_fec16(0);
                        if (txq->tx_skbuff[i]) {
                                dev_kfree_skb_any(txq->tx_skbuff[i]);
                                txq->tx_skbuff[i] = NULL;
                        }
-                       bdp->cbd_bufaddr = 0;
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
                txq->dirty_tx = bdp;
        }
 }
@@ -947,8 +949,10 @@ fec_restart(struct net_device *ndev)
         */
        if (fep->quirks & FEC_QUIRK_ENET_MAC) {
                memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
-               writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
-               writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
+               writel((__force u32)cpu_to_be32(temp_mac[0]),
+                      fep->hwp + FEC_ADDR_LOW);
+               writel((__force u32)cpu_to_be32(temp_mac[1]),
+                      fep->hwp + FEC_ADDR_HIGH);
        }
 
        /* Clear any outstanding interrupt. */
@@ -1222,7 +1226,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
        while (bdp != READ_ONCE(txq->cur_tx)) {
                /* Order the load of cur_tx and cbd_sc */
                rmb();
-               status = READ_ONCE(bdp->cbd_sc);
+               status = fec16_to_cpu(READ_ONCE(bdp->cbd_sc));
                if (status & BD_ENET_TX_READY)
                        break;
 
@@ -1230,10 +1234,12 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
                skb = txq->tx_skbuff[index];
                txq->tx_skbuff[index] = NULL;
-               if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                                       bdp->cbd_datlen, DMA_TO_DEVICE);
-               bdp->cbd_bufaddr = 0;
+               if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
+                                        fec16_to_cpu(bdp->cbd_datlen),
+                                        DMA_TO_DEVICE);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
                if (!skb) {
                        bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
                        continue;
@@ -1264,7 +1270,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                        struct skb_shared_hwtstamps shhwtstamps;
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       fec_enet_hwtstamp(fep, ebdp->ts, &shhwtstamps);
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
                        skb_tstamp_tx(skb, &shhwtstamps);
                }
 
@@ -1324,10 +1330,8 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff
        if (off)
                skb_reserve(skb, fep->rx_align + 1 - off);
 
-       bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data,
-                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
-                                         DMA_FROM_DEVICE);
-       if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+       bdp->cbd_bufaddr = cpu_to_fec32(dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE));
+       if (dma_mapping_error(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr))) {
                if (net_ratelimit())
                        netdev_err(ndev, "Rx DMA memory map failed\n");
                return -ENOMEM;
@@ -1349,7 +1353,8 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
        if (!new_skb)
                return false;
 
-       dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
+       dma_sync_single_for_cpu(&fep->pdev->dev,
+                               fec32_to_cpu(bdp->cbd_bufaddr),
                                FEC_ENET_RX_FRSIZE - fep->rx_align,
                                DMA_FROM_DEVICE);
        if (!swap)
@@ -1396,7 +1401,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
         */
        bdp = rxq->cur_rx;
 
-       while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
+       while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
 
                if (pkt_received >= budget)
                        break;
@@ -1438,7 +1443,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Process the incoming frame. */
                ndev->stats.rx_packets++;
-               pkt_len = bdp->cbd_datlen;
+               pkt_len = fec16_to_cpu(bdp->cbd_datlen);
                ndev->stats.rx_bytes += pkt_len;
 
                index = fec_enet_get_bd_index(rxq->rx_bd_base, bdp, fep);
@@ -1456,7 +1461,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                                ndev->stats.rx_dropped++;
                                goto rx_processing_done;
                        }
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
                                         DMA_FROM_DEVICE);
                }
@@ -1475,7 +1481,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                /* If this is a VLAN packet remove the VLAN Tag */
                vlan_packet_rcvd = false;
                if ((ndev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-                       fep->bufdesc_ex && (ebdp->cbd_esc & BD_ENET_RX_VLAN)) {
+                   fep->bufdesc_ex &&
+                   (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) {
                        /* Push and remove the vlan tag */
                        struct vlan_hdr *vlan_header =
                                        (struct vlan_hdr *) (data + ETH_HLEN);
@@ -1491,12 +1498,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Get receive timestamp from the skb */
                if (fep->hwts_rx_en && fep->bufdesc_ex)
-                       fec_enet_hwtstamp(fep, ebdp->ts,
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts),
                                          skb_hwtstamps(skb));
 
                if (fep->bufdesc_ex &&
                    (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
-                       if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ERROR)) {
+                       if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) {
                                /* don't check it */
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
                        } else {
@@ -1513,7 +1520,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                napi_gro_receive(&fep->napi, skb);
 
                if (is_copybreak) {
-                       dma_sync_single_for_device(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_sync_single_for_device(&fep->pdev->dev,
+                                                  fec32_to_cpu(bdp->cbd_bufaddr),
                                                   FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                   DMA_FROM_DEVICE);
                } else {
@@ -1527,12 +1535,12 @@ rx_processing_done:
 
                /* Mark the buffer empty */
                status |= BD_ENET_RX_EMPTY;
-               bdp->cbd_sc = status;
+               bdp->cbd_sc = cpu_to_fec16(status);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                        ebdp->cbd_prot = 0;
                        ebdp->cbd_bdu = 0;
                }
@@ -2145,8 +2153,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
 
 /* List of registers that can be safety be read to dump them with ethtool */
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-       defined(CONFIG_M520x) || defined(CONFIG_M532x) ||               \
-       defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+       defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 static u32 fec_enet_register_offset[] = {
        FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
        FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2662,7 +2669,7 @@ static void fec_enet_free_buffers(struct net_device *ndev)
                        rxq->rx_skbuff[i] = NULL;
                        if (skb) {
                                dma_unmap_single(&fep->pdev->dev,
-                                                bdp->cbd_bufaddr,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
                                                 FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                 DMA_FROM_DEVICE);
                                dev_kfree_skb(skb);
@@ -2777,11 +2784,11 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
                }
 
                rxq->rx_skbuff[i] = skb;
-               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2789,7 +2796,7 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
        return 0;
 
  err_alloc:
@@ -2812,12 +2819,12 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
                if (!txq->tx_bounce[i])
                        goto err_alloc;
 
-               bdp->cbd_sc = 0;
-               bdp->cbd_bufaddr = 0;
+               bdp->cbd_sc = cpu_to_fec16(0);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_TX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2825,7 +2832,7 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
        return 0;
 
index 52e0091..1ba359f 100644 (file)
@@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev)
        cbd_t __iomem *prev_bd;
        cbd_t __iomem *last_tx_bd;
 
-       last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
+       last_tx_bd = fep->tx_bd_base + (fpi->tx_ring - 1);
 
        /* get the current bd held in TBPTR  and scan back from this point */
        recheck_bd = curr_tbptr = (cbd_t __iomem *)
index b364529..3bfe36f 100644 (file)
@@ -95,21 +95,17 @@ static struct hnae_buf_ops hnae_bops = {
 static int __ae_match(struct device *dev, const void *data)
 {
        struct hnae_ae_dev *hdev = cls_to_ae_dev(dev);
-       const char *ae_id = data;
 
-       if (!strncmp(ae_id, hdev->name, AE_NAME_SIZE))
-               return 1;
-
-       return 0;
+       return hdev->dev->of_node == data;
 }
 
-static struct hnae_ae_dev *find_ae(const char *ae_id)
+static struct hnae_ae_dev *find_ae(const struct device_node *ae_node)
 {
        struct device *dev;
 
-       WARN_ON(!ae_id);
+       WARN_ON(!ae_node);
 
-       dev = class_find_device(hnae_class, NULL, ae_id, __ae_match);
+       dev = class_find_device(hnae_class, NULL, ae_node, __ae_match);
 
        return dev ? cls_to_ae_dev(dev) : NULL;
 }
@@ -316,7 +312,8 @@ EXPORT_SYMBOL(hnae_reinit_handle);
  * return handle ptr or ERR_PTR
  */
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-                                   const char *ae_id, u32 port_id,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
                                    struct hnae_buf_ops *bops)
 {
        struct hnae_ae_dev *dev;
@@ -324,7 +321,7 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
        int i, j;
        int ret;
 
-       dev = find_ae(ae_id);
+       dev = find_ae(ae_node);
        if (!dev)
                return ERR_PTR(-ENODEV);
 
index 6ca94dc..1cbcb9f 100644 (file)
@@ -524,8 +524,11 @@ struct hnae_handle {
 
 #define ring_to_dev(ring) ((ring)->q->dev->dev)
 
-struct hnae_handle *hnae_get_handle(struct device *owner_dev, const char *ae_id,
-                                   u32 port_id, struct hnae_buf_ops *bops);
+struct hnae_handle *hnae_get_handle(struct device *owner_dev,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
+                                   struct hnae_buf_ops *bops);
+
 void hnae_put_handle(struct hnae_handle *handle);
 int hnae_ae_register(struct hnae_ae_dev *dev, struct module *owner);
 void hnae_ae_unregister(struct hnae_ae_dev *dev);
index 522b264..a0070d0 100644 (file)
@@ -847,6 +847,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
 {
        struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev;
+       static atomic_t id = ATOMIC_INIT(-1);
 
        switch (dsaf_dev->dsaf_ver) {
        case AE_VERSION_1:
@@ -858,6 +859,9 @@ int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
        default:
                break;
        }
+
+       snprintf(ae_dev->name, AE_NAME_SIZE, "%s%d", DSAF_DEVICE_NAME,
+                (int)atomic_inc_return(&id));
        ae_dev->ops = &hns_dsaf_ops;
        ae_dev->dev = dsaf_dev->dev;
 
index 1c33bd0..9439f04 100644 (file)
@@ -35,7 +35,7 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        int ret, i;
        u32 desc_num;
        u32 buf_size;
-       const char *name, *mode_str;
+       const char *mode_str;
        struct device_node *np = dsaf_dev->dev->of_node;
 
        if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
@@ -43,14 +43,6 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        else
                dsaf_dev->dsaf_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(np, "dsa_name", &name);
-       if (ret) {
-               dev_err(dsaf_dev->dev, "get dsaf name fail, ret=%d!\n", ret);
-               return ret;
-       }
-       strncpy(dsaf_dev->ae_dev.name, name, AE_NAME_SIZE);
-       dsaf_dev->ae_dev.name[AE_NAME_SIZE - 1] = '\0';
-
        ret = of_property_read_string(np, "mode", &mode_str);
        if (ret) {
                dev_err(dsaf_dev->dev, "get dsaf mode fail, ret=%d!\n", ret);
index 31c312f..40205b9 100644 (file)
@@ -18,6 +18,7 @@ struct hns_mac_cb;
 
 #define DSAF_DRV_NAME "hns_dsaf"
 #define DSAF_MOD_VERSION "v1.0"
+#define DSAF_DEVICE_NAME "dsaf"
 
 #define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000
 
index 0e30846..3f77ff7 100644 (file)
@@ -1802,7 +1802,7 @@ static int hns_nic_try_get_ae(struct net_device *ndev)
        int ret;
 
        h = hnae_get_handle(&priv->netdev->dev,
-                           priv->ae_name, priv->port_id, NULL);
+                           priv->ae_node, priv->port_id, NULL);
        if (IS_ERR_OR_NULL(h)) {
                ret = PTR_ERR(h);
                dev_dbg(priv->dev, "has not handle, register notifier!\n");
@@ -1880,13 +1880,16 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
        else
                priv->enet_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(node, "ae-name", &priv->ae_name);
-       if (ret)
-               goto out_read_string_fail;
+       priv->ae_node = (void *)of_parse_phandle(node, "ae-handle", 0);
+       if (IS_ERR_OR_NULL(priv->ae_node)) {
+               ret = PTR_ERR(priv->ae_node);
+               dev_err(dev, "not find ae-handle\n");
+               goto out_read_prop_fail;
+       }
 
        ret = of_property_read_u32(node, "port-id", &priv->port_id);
        if (ret)
-               goto out_read_string_fail;
+               goto out_read_prop_fail;
 
        hns_init_mac_addr(ndev);
 
@@ -1945,7 +1948,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 
 out_notify_fail:
        (void)cancel_work_sync(&priv->service_task);
-out_read_string_fail:
+out_read_prop_fail:
        free_netdev(ndev);
        return ret;
 }
index 4b75270..c68ab3d 100644 (file)
@@ -51,7 +51,7 @@ struct hns_nic_ops {
 };
 
 struct hns_nic_priv {
-       const char *ae_name;
+       const struct device_node *ae_node;
        u32 enet_ver;
        u32 port_id;
        int phy_mode;
index 1d5c3e1..3daf2d4 100644 (file)
@@ -194,7 +194,6 @@ static const char *hp100_isa_tbl[] = {
 };
 #endif
 
-#ifdef CONFIG_EISA
 static struct eisa_device_id hp100_eisa_tbl[] = {
        { "HWPF180" }, /* HP J2577 rev A */
        { "HWP1920" }, /* HP 27248B */
@@ -205,9 +204,7 @@ static struct eisa_device_id hp100_eisa_tbl[] = {
        { "" }         /* Mandatory final entry ! */
 };
 MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-#endif
 
-#ifdef CONFIG_PCI
 static const struct pci_device_id hp100_pci_tbl[] = {
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
@@ -219,7 +216,6 @@ static const struct pci_device_id hp100_pci_tbl[] = {
        {}                      /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-#endif
 
 static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
 static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
@@ -2842,7 +2838,6 @@ static void cleanup_dev(struct net_device *d)
        free_netdev(d);
 }
 
-#ifdef CONFIG_EISA
 static int hp100_eisa_probe(struct device *gendev)
 {
        struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
@@ -2884,9 +2879,7 @@ static struct eisa_driver hp100_eisa_driver = {
                .remove  = hp100_eisa_remove,
         }
 };
-#endif
 
-#ifdef CONFIG_PCI
 static int hp100_pci_probe(struct pci_dev *pdev,
                           const struct pci_device_id *ent)
 {
@@ -2955,7 +2948,6 @@ static struct pci_driver hp100_pci_driver = {
        .probe          = hp100_pci_probe,
        .remove         = hp100_pci_remove,
 };
-#endif
 
 /*
  *  module section
@@ -3032,23 +3024,17 @@ static int __init hp100_module_init(void)
        err = hp100_isa_init();
        if (err && err != -ENODEV)
                goto out;
-#ifdef CONFIG_EISA
        err = eisa_driver_register(&hp100_eisa_driver);
        if (err && err != -ENODEV)
                goto out2;
-#endif
-#ifdef CONFIG_PCI
        err = pci_register_driver(&hp100_pci_driver);
        if (err && err != -ENODEV)
                goto out3;
-#endif
  out:
        return err;
  out3:
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
  out2:
-#endif
        hp100_isa_cleanup();
        goto out;
 }
@@ -3057,12 +3043,8 @@ static int __init hp100_module_init(void)
 static void __exit hp100_module_exit(void)
 {
        hp100_isa_cleanup();
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
-#endif
-#ifdef CONFIG_PCI
        pci_unregister_driver (&hp100_pci_driver);
-#endif
 }
 
 module_init(hp100_module_init)
index bb4612c..8f3b53e 100644 (file)
@@ -7117,9 +7117,7 @@ static void i40e_service_task(struct work_struct *work)
        i40e_watchdog_subtask(pf);
        i40e_fdir_reinit_subtask(pf);
        i40e_sync_filters_subtask(pf);
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
        i40e_sync_udp_filters_subtask(pf);
-#endif
        i40e_clean_adminq_subtask(pf);
 
        i40e_service_event_complete(pf);
@@ -8515,6 +8513,8 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 }
 
 #endif
+
+#if IS_ENABLED(CONFIG_VXLAN)
 /**
  * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @netdev: This physical port's netdev
@@ -8524,7 +8524,6 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 static void i40e_add_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8557,7 +8556,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
        pf->pending_udp_bitmap |= BIT_ULL(next_idx);
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-#endif
 }
 
 /**
@@ -8569,7 +8567,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 static void i40e_del_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8592,9 +8589,10 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
                netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
+#if IS_ENABLED(CONFIG_GENEVE)
 /**
  * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
  * @netdev: This physical port's netdev
@@ -8604,7 +8602,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
 static void i40e_add_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8639,7 +8636,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
 
        dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
-#endif
 }
 
 /**
@@ -8651,7 +8647,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
 static void i40e_del_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8677,8 +8672,8 @@ static void i40e_del_geneve_port(struct net_device *netdev,
                netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
                                 struct netdev_phys_item_id *ppid)
index 720516b..47bd8b3 100644 (file)
@@ -2313,8 +2313,8 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
        struct iphdr *this_ip_hdr;
        u32 network_hdr_len;
        u8 l4_hdr = 0;
-       struct udphdr *oudph;
-       struct iphdr *oiph;
+       struct udphdr *oudph = NULL;
+       struct iphdr *oiph = NULL;
        u32 l4_tunnel = 0;
 
        if (skb->encapsulation) {
index a0c0383..5583118 100644 (file)
@@ -762,10 +762,10 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 
        if (length <= 8 && (uintptr_t)data & 0x7) {
                /* Copy unaligned small data fragment to TSO header data area */
-               memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+               memcpy(txq->tso_hdrs + tx_index * TSO_HEADER_SIZE,
                       data, length);
                desc->buf_ptr = txq->tso_hdrs_dma
-                       + txq->tx_curr_desc * TSO_HEADER_SIZE;
+                       + tx_index * TSO_HEADER_SIZE;
        } else {
                /* Alignment is okay, map buffer and hand off to hardware */
                txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
index fabc8df..662c2ee 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
-#include <linux/mbus.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
 #include <linux/io.h>
-#include <net/tso.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/cpu.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -373,6 +373,8 @@ struct mvneta_port {
 
        /* Core clock */
        struct clk *clk;
+       /* AXI clock */
+       struct clk *clk_bus;
        u8 mcast_count[256];
        u16 tx_ring_size;
        u16 rx_ring_size;
@@ -3242,26 +3244,25 @@ static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
        const struct mvneta_statistic *s;
        void __iomem *base = pp->base;
        u32 high, low, val;
+       u64 val64;
        int i;
 
        for (i = 0, s = mvneta_statistics;
             s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
             s++, i++) {
-               val = 0;
-
                switch (s->type) {
                case T_REG_32:
                        val = readl_relaxed(base + s->offset);
+                       pp->ethtool_stats[i] += val;
                        break;
                case T_REG_64:
                        /* Docs say to read low 32-bit then high */
                        low = readl_relaxed(base + s->offset);
                        high = readl_relaxed(base + s->offset + 4);
-                       val = (u64)high << 32 | low;
+                       val64 = (u64)high << 32 | low;
+                       pp->ethtool_stats[i] += val64;
                        break;
                }
-
-               pp->ethtool_stats[i] += val;
        }
 }
 
@@ -3605,7 +3606,9 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->indir[0] = rxq_def;
 
-       pp->clk = devm_clk_get(&pdev->dev, NULL);
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pp->clk)) {
                err = PTR_ERR(pp->clk);
                goto err_put_phy_node;
@@ -3613,6 +3616,10 @@ static int mvneta_probe(struct platform_device *pdev)
 
        clk_prepare_enable(pp->clk);
 
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pp->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pp->base)) {
@@ -3724,6 +3731,7 @@ err_free_stats:
 err_free_ports:
        free_percpu(pp->ports);
 err_clk:
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
 err_put_phy_node:
        of_node_put(phy_node);
@@ -3741,6 +3749,7 @@ static int mvneta_remove(struct platform_device *pdev)
        struct mvneta_port *pp = netdev_priv(dev);
 
        unregister_netdev(dev);
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
        free_percpu(pp->ports);
        free_percpu(pp->stats);
index 2c2baab..d66c690 100644 (file)
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [29] = "802.1ad offload support",
                [31] = "Modifying loopback source checks using UPDATE_QP support",
                [32] = "Loopback source checks support",
+               [33] = "RoCEv2 support"
        };
        int i;
 
@@ -626,6 +627,8 @@ out:
        return err;
 }
 
+static void disable_unsupported_roce_caps(void *buf);
+
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        if (err)
                goto out;
 
+       if (mlx4_is_mfunc(dev))
+               disable_unsupported_roce_caps(outbox);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
        dev_cap->reserved_qps = 1 << (field & 0xf);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
        MLX4_GET(dev_cap->bmme_flags, outbox,
                 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
        if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        if (err)
                return err;
 
+       disable_unsupported_roce_caps(outbox->buf);
        /* add port mng change event capability and disable mw type 1
         * unconditionally to slaves
         */
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        return 0;
 }
 
+static void disable_unsupported_roce_caps(void *buf)
+{
+       u32 flags;
+
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       flags &= ~(1UL << 31);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       flags &= ~(1UL << 24);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
 int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
                            struct mlx4_vhcr *vhcr,
                            struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
        __be32  rsvd1[3];
        __be16  vxlan_udp_dport;
        __be16  rsvd2;
-       __be32  rsvd3;
+       __be16  roce_v2_entropy;
+       __be16  roce_v2_udp_dport;
        __be32  roce_flags;
        __be32  rsvd4[25];
        __be16  rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
 };
 
 #define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
 #define MLX4_DISABLE_RX_PORT BIT(18)
 
 static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
        return mlx4_CONFIG_DEV_set(dev, &config_dev);
 }
 
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+       struct mlx4_config_dev config_dev;
+
+       memset(&config_dev, 0, sizeof(config_dev));
+       config_dev.update_flags    = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+       config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+       return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
 {
        struct mlx4_cmd_mailbox *mailbox;
index 2404c22..7baef52 100644 (file)
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
        u16 reserved1;
        u8 v_ignore_fcs;
        u8 flags;
-       u8 ignore_fcs;
+       union {
+               u8 ignore_fcs;
+               u8 roce_mode;
+       };
        u8 reserved2;
        __be16 mtu;
        u8 pptx;
index f255042..787b7bb 100644 (file)
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
        return err;
 }
 
+#define SET_PORT_ROCE_2_FLAGS          0x10
+#define MLX4_SET_PORT_ROCE_V1_V2       0x2
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
                          u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
 {
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
        context->pprx = (pprx * (!pfcrx)) << 7;
        context->pfcrx = pfcrx;
 
+       if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+               context->flags |= SET_PORT_ROCE_2_FLAGS;
+               context->roce_mode |=
+                       MLX4_SET_PORT_ROCE_V1_V2 << 4;
+       }
        in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
        err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
                       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
index 168823d..d1cd9c3 100644 (file)
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
        }
 
+       if ((cur_state == MLX4_QP_STATE_RTR) &&
+           (new_state == MLX4_QP_STATE_RTS) &&
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               context->roce_entropy =
+                       cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
        *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
        memcpy(mailbox->buf + 8, context, sizeof *context);
 
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
        return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+       struct mlx4_qp_context context;
+       struct mlx4_qp qp;
+       int err;
+
+       qp.qpn = qpn;
+       err = mlx4_qp_query(dev, &qp, &context);
+       if (!err) {
+               u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+               u16 folded_dst = folded_qp(dest_qpn);
+               u16 folded_src = folded_qp(qpn);
+
+               return (dest_qpn != qpn) ?
+                       ((folded_dst ^ folded_src) | 0xC000) :
+                       folded_src | 0xC000;
+       }
+       return 0xdead;
+}
index 9ea49a8..aac071a 100644 (file)
@@ -39,8 +39,8 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
 #include "wq.h"
-#include "transobj.h"
 #include "mlx5_core.h"
 
 #define MLX5E_MAX_NUM_TC       8
index c56d91a..6a3e430 100644 (file)
@@ -2241,7 +2241,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
                goto err_unmap_free_uar;
        }
 
-       err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+       err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
        if (err) {
                mlx5_core_err(mdev, "alloc td failed, %d\n", err);
                goto err_dealloc_pd;
@@ -2324,7 +2324,7 @@ err_destroy_mkey:
        mlx5_core_destroy_mkey(mdev, &priv->mr);
 
 err_dealloc_transport_domain:
-       mlx5_dealloc_transport_domain(mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
 
 err_dealloc_pd:
        mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2356,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
        mlx5e_close_drop_rq(priv);
        mlx5e_destroy_tises(priv);
        mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
-       mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
        mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
        mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
        free_netdev(netdev);
index 23c244a..647a3ca 100644 (file)
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
                case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
                        rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+                       rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
                        mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
                                      eqe_type_str(eqe->type), eqe->type, rsn);
                        mlx5_rsc_event(dev, rsn, eqe->type);
index b37749a..1545a94 100644 (file)
@@ -78,6 +78,11 @@ struct mlx5_device_context {
        void                   *context;
 };
 
+enum {
+       MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+       MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
 static struct mlx5_profile profile[] = {
        [0] = {
                .mask           = 0,
@@ -387,7 +392,7 @@ query_ex:
        return err;
 }
 
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
        u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
        int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        memset(out, 0, sizeof(out));
 
        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+       MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
        err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
        if (err)
                return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        return err;
 }
 
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+       void *set_ctx;
+       void *set_hca_cap;
+       int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+       int req_endianness;
+       int err;
+
+       if (MLX5_CAP_GEN(dev, atomic)) {
+               err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+                                        HCA_CAP_OPMOD_GET_CUR);
+               if (err)
+                       return err;
+       } else {
+               return 0;
+       }
+
+       req_endianness =
+               MLX5_CAP_ATOMIC(dev,
+                               supported_atomic_req_8B_endianess_mode_1);
+
+       if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+               return 0;
+
+       set_ctx = kzalloc(set_sz, GFP_KERNEL);
+       if (!set_ctx)
+               return -ENOMEM;
+
+       set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+       /* Set requestor to host endianness */
+       MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+                MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+       err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+       kfree(set_ctx);
+       return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 
        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 
-       err = set_caps(dev, set_ctx, set_sz);
+       err = set_caps(dev, set_ctx, set_sz,
+                      MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 
 query_ex:
        kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
        return err;
 }
 
-#ifdef CONFIG_MLX5_CORE_EN
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 
        return -ENOTSUPP;
 }
-#endif
 
 static int map_bf_area(struct mlx5_core_dev *dev)
 {
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_pagealloc_cleanup;
        }
 
-#ifdef CONFIG_MLX5_CORE_EN
        err = mlx5_core_set_issi(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to set issi\n");
                goto err_disable_hca;
        }
-#endif
 
        err = mlx5_satisfy_startup_pages(dev, 1);
        if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto reclaim_boot_pages;
        }
 
+       err = handle_hca_cap_atomic(dev);
+       if (err) {
+               dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+               goto reclaim_boot_pages;
+       }
+
        err = mlx5_satisfy_startup_pages(dev, 0);
        if (err) {
                dev_err(&pdev->dev, "failed to allocate init pages\n");
index 30e2ba3..def2893 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
 
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
                complete(&common->free);
 }
 
+static u64 qp_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+              BIT(MLX5_EVENT_TYPE_COMM_EST) |
+              BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+              BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+              BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+              BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+              BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+       return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+       return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+       return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+       switch (rsc_type) {
+       case MLX5_EVENT_QUEUE_TYPE_QP:
+               return BIT(event_type) & qp_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_RQ:
+               return BIT(event_type) & rq_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_SQ:
+               return BIT(event_type) & sq_allowed_event_types();
+       default:
+               WARN(1, "Event arrived for unknown resource type");
+               return false;
+       }
+}
+
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
 {
        struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
        if (!common)
                return;
 
+       if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+               mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+                              event_type, rsn);
+               return;
+       }
+
        switch (common->res) {
        case MLX5_RES_QP:
+       case MLX5_RES_RQ:
+       case MLX5_RES_SQ:
                qp = (struct mlx5_core_qp *)common;
                qp->event(qp, event_type);
                break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
 }
 #endif
 
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+                               struct mlx5_core_qp *qp,
+                               int rsc_type)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       int err;
+
+       qp->common.res = rsc_type;
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree,
+                               qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+                               qp);
+       spin_unlock_irq(&table->lock);
+       if (err)
+               return err;
+
+       atomic_set(&qp->common.refcount, 1);
+       init_completion(&qp->common.free);
+       qp->pid = current->pid;
+
+       return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *qp)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       unsigned long flags;
+
+       spin_lock_irqsave(&table->lock, flags);
+       radix_tree_delete(&table->tree,
+                         qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+       spin_unlock_irqrestore(&table->lock, flags);
+       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+       wait_for_completion(&qp->common.free);
+}
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen)
 {
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_create_qp_mbox_out out;
        struct mlx5_destroy_qp_mbox_in din;
        struct mlx5_destroy_qp_mbox_out dout;
        int err;
-       void *qpc;
 
        memset(&out, 0, sizeof(out));
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
 
-       if (dev->issi) {
-               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
-               /* 0xffffff means we ask to work with cqe version 0 */
-               MLX5_SET(qpc, qpc, user_index, 0xffffff);
-       }
-
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
        if (err) {
                mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
        qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
        mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
-       qp->common.res = MLX5_RES_QP;
-       spin_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, qp->qpn, qp);
-       spin_unlock_irq(&table->lock);
-       if (err) {
-               mlx5_core_warn(dev, "err %d\n", err);
+       err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+       if (err)
                goto err_cmd;
-       }
 
        err = mlx5_debug_qp_add(dev, qp);
        if (err)
                mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
                              qp->qpn);
 
-       qp->pid = current->pid;
-       atomic_set(&qp->common.refcount, 1);
        atomic_inc(&dev->num_qps);
-       init_completion(&qp->common.free);
 
        return 0;
 
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 {
        struct mlx5_destroy_qp_mbox_in in;
        struct mlx5_destroy_qp_mbox_out out;
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
-       unsigned long flags;
        int err;
 
        mlx5_debug_qp_remove(dev, qp);
 
-       spin_lock_irqsave(&table->lock, flags);
-       radix_tree_delete(&table->tree, qp->qpn);
-       spin_unlock_irqrestore(&table->lock, flags);
-
-       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
-       wait_for_completion(&qp->common.free);
+       destroy_qprqsq_common(dev, qp);
 
        memset(&in, 0, sizeof(in));
        memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
 
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp)
 {
-       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
-               [MLX5_QP_STATE_RST] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
-               },
-               [MLX5_QP_STATE_INIT]  = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
-                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
-               },
-               [MLX5_QP_STATE_RTR]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
-               },
-               [MLX5_QP_STATE_RTS]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
-               },
-               [MLX5_QP_STATE_SQD] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               },
-               [MLX5_QP_STATE_SQER] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
-               },
-               [MLX5_QP_STATE_ERR] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               }
-       };
-
        struct mlx5_modify_qp_mbox_out out;
        int err = 0;
-       u16 op;
-
-       if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
-           !optab[cur_state][new_state])
-               return -EINVAL;
 
        memset(&out, 0, sizeof(out));
-       op = optab[cur_state][new_state];
-       in->hdr.opcode = cpu_to_be16(op);
+       in->hdr.opcode = cpu_to_be16(operation);
        in->qpn = cpu_to_be32(qp->qpn);
        err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
        if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
 #endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq)
+{
+       int err;
+       u32 rqn;
+
+       err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+       if (err)
+               return err;
+
+       rq->qpn = rqn;
+       err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+       if (err)
+               goto err_destroy_rq;
+
+       return 0;
+
+err_destroy_rq:
+       mlx5_core_destroy_rq(dev, rq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq)
+{
+       destroy_qprqsq_common(dev, rq);
+       mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq)
+{
+       int err;
+       u32 sqn;
+
+       err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+       if (err)
+               return err;
+
+       sq->qpn = sqn;
+       err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+       if (err)
+               goto err_destroy_sq;
+
+       return 0;
+
+err_destroy_sq:
+       mlx5_core_destroy_sq(dev, sq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq)
+{
+       destroy_qprqsq_common(dev, sq);
+       mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
index ffada80..04bc522 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/mlx5/srq.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
 {
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 
        memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
        memcpy(pas, in->pas, pas_size);
-       /* 0xffffff means we ask to work with cqe version 0 */
-       MLX5_SET(xrc_srqc,          xrc_srqc,  user_index, 0xffffff);
        MLX5_SET(create_xrc_srq_in, create_in, opcode,
                 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
index d7068f5..03a5093 100644 (file)
@@ -32,9 +32,9 @@
 
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
        u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
 
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+       MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+       MLX5_SET(query_rq_in, in, rqn, rqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
        u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+       MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+       MLX5_SET(query_sq_in, in, sqn, sqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tirn)
 {
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tir);
 
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
                         int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
 int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen)
+{
+       u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+       MLX5_SET(modify_tis_in, in, tisn, tisn);
+       MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
deleted file mode 100644 (file)
index 74cae51..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __TRANSOBJ_H__
-#define __TRANSOBJ_H__
-
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
-int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
-void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
-int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
-void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tirn);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tisn);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
-int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rqtn);
-int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
-
-#endif /* __TRANSOBJ_H__ */
index 076197e..c7398b9 100644 (file)
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 
        return MLX5_GET(query_vport_state_out, out, admin_state);
 }
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
                                        u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                nic_vport_context.permanent_address);
 
        err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
-       if (err)
-               goto out;
-
-       ether_addr_copy(addr, &out_addr[2]);
+       if (!err)
+               ether_addr_copy(addr, &out_addr[2]);
 
-out:
        kvfree(out);
        return err;
 }
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
                                  u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
 
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                                       nic_vport_context.system_image_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                               nic_vport_context.node_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+                                  nic_vport_context.qkey_violation_counter);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+       MLX5_VPORT_ROCE_DISABLED = 0,
+       MLX5_VPORT_ROCE_ENABLED  = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+                                           enum mlx5_vport_roce_state state)
+{
+       void *in;
+       int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+       int err;
+
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               mlx5_core_warn(mdev, "failed to allocate inbox\n");
+               return -ENOMEM;
+       }
+
+       MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+       MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+                state);
+
+       err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
index 0c52372..bb77e22 100644 (file)
@@ -1044,6 +1044,92 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
        mlxsw_reg_sftr_port_mask_set(payload, port, 1);
 }
 
+/* SFDF - Switch Filtering DB Flush
+ * --------------------------------
+ * The switch filtering DB flush register is used to flush the FDB.
+ * Note that FDB notifications are flushed as well.
+ */
+#define MLXSW_REG_SFDF_ID 0x2013
+#define MLXSW_REG_SFDF_LEN 0x14
+
+static const struct mlxsw_reg_info mlxsw_reg_sfdf = {
+       .id = MLXSW_REG_SFDF_ID,
+       .len = MLXSW_REG_SFDF_LEN,
+};
+
+/* reg_sfdf_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfdf_flush_type {
+       MLXSW_REG_SFDF_FLUSH_PER_SWID,
+       MLXSW_REG_SFDF_FLUSH_PER_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+};
+
+/* reg_sfdf_flush_type
+ * Flush type.
+ * 0 - All SWID dynamic entries are flushed.
+ * 1 - All FID dynamic entries are flushed.
+ * 2 - All dynamic entries pointing to port are flushed.
+ * 3 - All FID dynamic entries pointing to port are flushed.
+ * 4 - All dynamic entries pointing to LAG are flushed.
+ * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
+
+/* reg_sfdf_flush_static
+ * Static.
+ * 0 - Flush only dynamic entries.
+ * 1 - Flush both dynamic and static entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1);
+
+static inline void mlxsw_reg_sfdf_pack(char *payload,
+                                      enum mlxsw_reg_sfdf_flush_type type)
+{
+       MLXSW_REG_ZERO(sfdf, payload);
+       mlxsw_reg_sfdf_flush_type_set(payload, type);
+       mlxsw_reg_sfdf_flush_static_set(payload, true);
+}
+
+/* reg_sfdf_fid
+ * FID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16);
+
+/* reg_sfdf_system_port
+ * Port to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16);
+
+/* reg_sfdf_port_fid_system_port
+ * Port to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16);
+
+/* reg_sfdf_lag_id
+ * LAG ID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10);
+
+/* reg_sfdf_lag_fid_lag_id
+ * LAG ID to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
+
 /* SLDR - Switch LAG Descriptor Register
  * -----------------------------------------
  * The switch LAG descriptor register is populated by LAG descriptors.
@@ -1701,20 +1787,20 @@ MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
  * Module number.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
 
 /* reg_pmlp_tx_lane
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -3121,6 +3207,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
                return "SFGC";
        case MLXSW_REG_SFTR_ID:
                return "SFTR";
+       case MLXSW_REG_SFDF_ID:
+               return "SFDF";
        case MLXSW_REG_SLDR_ID:
                return "SLDR";
        case MLXSW_REG_SLCR_ID:
index ce6845d..217856b 100644 (file)
@@ -1979,6 +1979,115 @@ static struct mlxsw_driver mlxsw_sp_driver = {
        .profile                = &mlxsw_sp_config_profile,
 };
 
+static int
+mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT);
+       mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                   u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
+                                               mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG);
+       mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                     u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int
+__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       if (!list_empty(&mlxsw_sp_port->vports_list))
+               if (mlxsw_sp_port->lagged)
+                       return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port);
+               else
+                       return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port);
+       else
+               if (mlxsw_sp_port->lagged)
+                       return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port);
+               else
+                       return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport);
+       u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+
+       if (mlxsw_sp_vport->lagged)
+               return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport,
+                                                            fid);
+       else
+               return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid);
+}
+
 static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 {
        return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -2006,10 +2115,14 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
        return 0;
 }
 
-static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+                                     bool flush_fdb)
 {
        struct net_device *dev = mlxsw_sp_port->dev;
 
+       if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
+               netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
+
        mlxsw_sp_port->learning = 0;
        mlxsw_sp_port->learning_sync = 0;
        mlxsw_sp_port->uc_flood = 0;
@@ -2200,10 +2313,15 @@ err_col_port_enable:
        return err;
 }
 
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                      struct net_device *br_dev,
+                                      bool flush_fdb);
+
 static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *lag_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_port *mlxsw_sp_vport;
        struct mlxsw_sp_upper *lag;
        u16 lag_id = mlxsw_sp_port->lag_id;
        int err;
@@ -2220,7 +2338,32 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
        if (err)
                return err;
 
+       /* In case we leave a LAG device that has bridges built on top,
+        * then their teardown sequence is never issued and we need to
+        * invoke the necessary cleanup routines ourselves.
+        */
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               struct net_device *br_dev;
+
+               if (!mlxsw_sp_vport->bridged)
+                       continue;
+
+               br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false);
+       }
+
+       if (mlxsw_sp_port->bridged) {
+               mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
+               mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
+
+               if (lag->ref_count == 1)
+                       mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+       }
+
        if (lag->ref_count == 1) {
+               if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port))
+                       netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
                err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
                if (err)
                        return err;
@@ -2272,9 +2415,6 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev);
-
 static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *vlan_dev)
 {
@@ -2312,7 +2452,7 @@ static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
                struct net_device *br_dev;
 
                br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true);
        }
 
        mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
@@ -2374,7 +2514,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                                }
                                mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev);
                        } else {
-                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
+                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+                                                                true);
                                mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
                                if (err) {
                                        netdev_err(dev, "Failed to leave bridge\n");
@@ -2541,7 +2682,8 @@ static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev)
+                                      struct net_device *br_dev,
+                                      bool flush_fdb)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
        u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
@@ -2604,6 +2746,9 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
                goto err_vport_flood_set;
        }
 
+       if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
+               netdev_err(dev, "Failed to flush FDB\n");
+
        /* Switch between the vFIDs and destroy the old one if needed. */
        new_vfid->nr_vports++;
        mlxsw_sp_vport->vport.vfid = new_vfid;
@@ -2777,7 +2922,7 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
                        if (!mlxsw_sp_vport)
                                return NOTIFY_DONE;
                        err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
-                                                         upper_dev);
+                                                         upper_dev, true);
                        if (err) {
                                netdev_err(dev, "Failed to leave bridge\n");
                                return NOTIFY_BAD;
index a23dc61..7f42eb1 100644 (file)
@@ -120,7 +120,6 @@ struct mlxsw_sp {
        } fdb_notify;
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
        u32 ageing_time;
-       struct mutex fdb_lock;  /* Make sure FDB sessions are atomic. */
        struct mlxsw_sp_upper master_bridge;
        struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 };
@@ -254,5 +253,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
                           __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
                             bool set, bool only_uc);
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
 
 #endif
index 45479ef..e492ca2 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/if_bridge.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
+#include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
 #include "spectrum.h"
@@ -124,14 +125,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
        int err;
 
        switch (state) {
-       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_FORWARDING:
                spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
                break;
-       case BR_STATE_LISTENING: /* fall-through */
        case BR_STATE_LEARNING:
                spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
                break;
+       case BR_STATE_LISTENING: /* fall-through */
+       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_BLOCKING:
                spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
                break;
@@ -936,6 +937,14 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                         vlan->vid_begin, vlan->vid_end, false);
 }
 
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       u16 vid;
+
+       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+}
+
 static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
                             const struct switchdev_obj_port_fdb *fdb)
@@ -1040,10 +1049,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                  struct switchdev_obj_port_fdb *fdb,
-                                 switchdev_obj_dump_cb_t *cb)
+                                 switchdev_obj_dump_cb_t *cb,
+                                 struct net_device *orig_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       u16 vport_vid = 0, vport_fid = 0;
+       struct mlxsw_sp_port *tmp;
+       u16 vport_fid = 0;
        char *sfd_pl;
        char mac[ETH_ALEN];
        u16 fid;
@@ -1058,13 +1069,11 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        if (!sfd_pl)
                return -ENOMEM;
 
-       mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
                u16 tmp;
 
                tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
                vport_fid = mlxsw_sp_vfid_to_fid(tmp);
-               vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
        }
 
        mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
@@ -1088,12 +1097,13 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid,
                                                        &local_port);
                                if (local_port == mlxsw_sp_port->local_port) {
-                                       if (vport_fid && vport_fid != fid)
-                                               continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1104,14 +1114,22 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                        case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
                                mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
                                                            mac, &fid, &lag_id);
-                               if (mlxsw_sp_port ==
-                                   mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
-                                       if (vport_fid && vport_fid != fid)
+                               tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+                               if (tmp && tmp->local_port ==
+                                   mlxsw_sp_port->local_port) {
+                                       /* LAG records can only point to LAG
+                                        * devices or VLAN devices on top.
+                                        */
+                                       if (!netif_is_lag_master(orig_dev) &&
+                                           !is_vlan_dev(orig_dev))
                                                continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1124,7 +1142,6 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
 
 out:
-       mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        kfree(sfd_pl);
        return stored_err ? stored_err : err;
 }
@@ -1176,7 +1193,8 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev,
                break;
        case SWITCHDEV_OBJ_ID_PORT_FDB:
                err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port,
-                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb);
+                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb,
+                                            obj->orig_dev);
                break;
        default:
                err = -EOPNOTSUPP;
@@ -1194,14 +1212,14 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
        .switchdev_port_obj_dump        = mlxsw_sp_port_obj_dump,
 };
 
-static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
-                                       bool adding, char *mac, u16 vid,
+static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding,
+                                       char *mac, u16 vid,
                                        struct net_device *dev)
 {
        struct switchdev_notifier_fdb_info info;
        unsigned long notifier_type;
 
-       if (learning && learning_sync) {
+       if (learning_sync) {
                info.addr = mac;
                info.vid = vid;
                notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
@@ -1237,7 +1255,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
                        netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
                        goto just_remove;
                }
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
@@ -1257,8 +1275,7 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync,
                                    adding, mac, vid, mlxsw_sp_port->dev);
        return;
 
@@ -1273,6 +1290,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                                                bool adding)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
+       struct net_device *dev;
        char mac[ETH_ALEN];
        u16 lag_vid = 0;
        u16 lag_id;
@@ -1298,11 +1316,13 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                        goto just_remove;
                }
 
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-               lag_vid = vid;
+               lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               dev = mlxsw_sp_vport->dev;
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
+               dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev;
                vid = fid;
        }
 
@@ -1319,10 +1339,8 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
-                                   adding, mac, vid,
-                                   mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac,
+                                   vid, dev);
        return;
 
 just_remove:
@@ -1374,7 +1392,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
 
        mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
-       mutex_lock(&mlxsw_sp->fdb_lock);
+       rtnl_lock();
        do {
                mlxsw_reg_sfn_pack(sfn_pl);
                err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
@@ -1387,7 +1405,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
                        mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
        } while (num_rec);
-       mutex_unlock(&mlxsw_sp->fdb_lock);
+       rtnl_unlock();
 
        kfree(sfn_pl);
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
@@ -1402,7 +1420,6 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
                dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n");
                return err;
        }
-       mutex_init(&mlxsw_sp->fdb_lock);
        INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
        mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
index a10c928..00cfd95 100644 (file)
 
 #include "moxart_ether.h"
 
+static inline void moxart_desc_write(u32 data, u32 *desc)
+{
+       *desc = cpu_to_le32(data);
+}
+
+static inline u32 moxart_desc_read(u32 *desc)
+{
+       return le32_to_cpu(*desc);
+}
+
 static inline void moxart_emac_write(struct net_device *ndev,
                                     unsigned int reg, unsigned long value)
 {
@@ -112,7 +122,7 @@ static void moxart_mac_enable(struct net_device *ndev)
 static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        int i;
 
        for (i = 0; i < TX_DESC_NUM; i++) {
@@ -121,7 +131,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 
                priv->tx_buf[i] = priv->tx_buf_base + priv->tx_buf_size * i;
        }
-       writel(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
+       moxart_desc_write(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
 
        priv->tx_head = 0;
        priv->tx_tail = 0;
@@ -129,8 +139,8 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
        for (i = 0; i < RX_DESC_NUM; i++) {
                desc = priv->rx_desc_base + i * RX_REG_DESC_SIZE;
                memset(desc, 0, RX_REG_DESC_SIZE);
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
-               writel(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               moxart_desc_write(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
                       desc + RX_REG_OFFSET_DESC1);
 
                priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i;
@@ -141,12 +151,12 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
                if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i]))
                        netdev_err(ndev, "DMA mapping error\n");
 
-               writel(priv->rx_mapping[i],
+               moxart_desc_write(priv->rx_mapping[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_PHYS);
-               writel(priv->rx_buf[i],
+               moxart_desc_write((uintptr_t)priv->rx_buf[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_VIRT);
        }
-       writel(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
+       moxart_desc_write(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
 
        priv->rx_head = 0;
 
@@ -201,14 +211,15 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                                                      napi);
        struct net_device *ndev = priv->ndev;
        struct sk_buff *skb;
-       void __iomem *desc;
+       void *desc;
        unsigned int desc0, len;
        int rx_head = priv->rx_head;
        int rx = 0;
 
        while (rx < budget) {
                desc = priv->rx_desc_base + (RX_REG_DESC_SIZE * rx_head);
-               desc0 = readl(desc + RX_REG_OFFSET_DESC0);
+               desc0 = moxart_desc_read(desc + RX_REG_OFFSET_DESC0);
+               rmb(); /* ensure desc0 is up to date */
 
                if (desc0 & RX_DESC0_DMA_OWN)
                        break;
@@ -250,7 +261,8 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                        priv->stats.multicast++;
 
 rx_next:
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               wmb(); /* prevent setting ownership back too early */
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
 
                rx_head = RX_NEXT(rx_head);
                priv->rx_head = rx_head;
@@ -310,7 +322,7 @@ static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        unsigned int len;
        unsigned int tx_head = priv->tx_head;
        u32 txdes1;
@@ -319,11 +331,12 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
        spin_lock_irq(&priv->txlock);
-       if (readl(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
+       if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
                net_dbg_ratelimited("no TX space for packet\n");
                priv->stats.tx_dropped++;
                goto out_unlock;
        }
+       rmb(); /* ensure data is only read that had TX_DESC0_DMA_OWN cleared */
 
        len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
 
@@ -337,9 +350,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        priv->tx_len[tx_head] = len;
        priv->tx_skb[tx_head] = skb;
 
-       writel(priv->tx_mapping[tx_head],
+       moxart_desc_write(priv->tx_mapping[tx_head],
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_PHYS);
-       writel(skb->data,
+       moxart_desc_write((uintptr_t)skb->data,
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_VIRT);
 
        if (skb->len < ETH_ZLEN) {
@@ -354,8 +367,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK);
        if (tx_head == TX_DESC_NUM_MASK)
                txdes1 |= TX_DESC1_END;
-       writel(txdes1, desc + TX_REG_OFFSET_DESC1);
-       writel(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
+       moxart_desc_write(txdes1, desc + TX_REG_OFFSET_DESC1);
+       wmb(); /* flush descriptor before transferring ownership */
+       moxart_desc_write(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
 
        /* start to send packet */
        writel(0xffffffff, priv->base + REG_TX_POLL_DEMAND);
index 2be9280..93a9563 100644 (file)
@@ -300,7 +300,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t rx_base;
        dma_addr_t rx_mapping[RX_DESC_NUM];
-       void __iomem *rx_desc_base;
+       void *rx_desc_base;
        unsigned char *rx_buf_base;
        unsigned char *rx_buf[RX_DESC_NUM];
        unsigned int rx_head;
@@ -308,7 +308,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t tx_base;
        dma_addr_t tx_mapping[TX_DESC_NUM];
-       void __iomem *tx_desc_base;
+       void *tx_desc_base;
        unsigned char *tx_buf_base;
        unsigned char *tx_buf[RX_DESC_NUM];
        unsigned int tx_head;
index 50d5604..e0993eb 100644 (file)
@@ -2223,8 +2223,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
        return IRQ_NONE;
 }
 
-#ifdef CONFIG_PCI_MSI
-
 static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 {
        struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
@@ -2442,16 +2440,13 @@ static void vxge_rem_msix_isr(struct vxgedev *vdev)
        if (vdev->config.intr_type == MSI_X)
                pci_disable_msix(vdev->pdev);
 }
-#endif
 
 static void vxge_rem_isr(struct vxgedev *vdev)
 {
-#ifdef CONFIG_PCI_MSI
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) &&
+           vdev->config.intr_type == MSI_X) {
                vxge_rem_msix_isr(vdev);
-       } else
-#endif
-       if (vdev->config.intr_type == INTA) {
+       } else if (vdev->config.intr_type == INTA) {
                        synchronize_irq(vdev->pdev->irq);
                        free_irq(vdev->pdev->irq, vdev);
        }
@@ -2460,11 +2455,10 @@ static void vxge_rem_isr(struct vxgedev *vdev)
 static int vxge_add_isr(struct vxgedev *vdev)
 {
        int ret = 0;
-#ifdef CONFIG_PCI_MSI
        int vp_idx = 0, intr_idx = 0, intr_cnt = 0, msix_idx = 0, irq_req = 0;
        int pci_fun = PCI_FUNC(vdev->pdev->devfn);
 
-       if (vdev->config.intr_type == MSI_X)
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X)
                ret = vxge_enable_msix(vdev);
 
        if (ret) {
@@ -2475,7 +2469,7 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->config.intr_type = INTA;
        }
 
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X) {
                for (intr_idx = 0;
                     intr_idx < (vdev->no_of_vpath *
                        VXGE_HW_VPATH_MSIX_ACTIVE); intr_idx++) {
@@ -2576,9 +2570,8 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->vxge_entries[intr_cnt].in_use = 1;
                vdev->vxge_entries[intr_cnt].arg = &vdev->vpaths[0];
        }
-INTA_MODE:
-#endif
 
+INTA_MODE:
        if (vdev->config.intr_type == INTA) {
                snprintf(vdev->desc[0], VXGE_INTR_STRLEN,
                        "%s:vxge:INTA", vdev->ndev->name);
@@ -3889,12 +3882,12 @@ static void vxge_device_config_init(struct vxge_hw_device_config *device_config,
        if (max_mac_vpath > VXGE_MAX_MAC_ADDR_COUNT)
                max_mac_vpath = VXGE_MAX_MAC_ADDR_COUNT;
 
-#ifndef CONFIG_PCI_MSI
-       vxge_debug_init(VXGE_ERR,
-               "%s: This Kernel does not support "
-               "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
-       *intr_type = INTA;
-#endif
+       if (!IS_ENABLED(CONFIG_PCI_MSI)) {
+               vxge_debug_init(VXGE_ERR,
+                       "%s: This Kernel does not support "
+                       "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
+               *intr_type = INTA;
+       }
 
        /* Configure whether MSI-X or IRQL. */
        switch (*intr_type) {
index a4ab71d..166a7fc 100644 (file)
@@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
        info.addr = lw->addr;
        info.vid = lw->vid;
 
+       rtnl_lock();
        if (learned && removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
                                         lw->rocker_port->dev, &info.info);
        else if (learned && !removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_ADD,
                                         lw->rocker_port->dev, &info.info);
+       rtnl_unlock();
 
        rocker_port_kfree(lw->trans, work);
 }
index cc106d8..23fa298 100644 (file)
@@ -389,17 +389,27 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
        if (vio_version_after_eq(&port->vio, 1, 8)) {
                struct vio_net_dext *dext = vio_net_ext(desc);
 
+               skb_reset_network_header(skb);
+
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
                        if (skb->protocol == ETH_P_IP) {
-                               struct iphdr *iph = (struct iphdr *)skb->data;
+                               struct iphdr *iph = ip_hdr(skb);
 
                                iph->check = 0;
                                ip_send_check(iph);
                        }
                }
                if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
-                   skb->ip_summed == CHECKSUM_NONE)
-                       vnet_fullcsum(skb);
+                   skb->ip_summed == CHECKSUM_NONE) {
+                       if (skb->protocol == htons(ETH_P_IP)) {
+                               struct iphdr *iph = ip_hdr(skb);
+                               int ihl = iph->ihl * 4;
+
+                               skb_reset_transport_header(skb);
+                               skb_set_transport_header(skb, ihl);
+                               vnet_fullcsum(skb);
+                       }
+               }
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
                        skb->ip_summed = CHECKSUM_PARTIAL;
                        skb->csum_level = 0;
index 657b65b..18bf3a8 100644 (file)
@@ -82,7 +82,7 @@ struct cpdma_desc {
 
 struct cpdma_desc_pool {
        phys_addr_t             phys;
-       u32                     hw_addr;
+       dma_addr_t              hw_addr;
        void __iomem            *iomap;         /* ioremap map */
        void                    *cpumap;        /* dma_alloc map */
        int                     desc_size, mem_size;
@@ -152,7 +152,7 @@ struct cpdma_chan {
  * abstract out these details
  */
 static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
+cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
                                int size, int align)
 {
        int bitmap_size;
@@ -176,13 +176,13 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
 
        if (phys) {
                pool->phys  = phys;
-               pool->iomap = ioremap(phys, size);
+               pool->iomap = ioremap(phys, size); /* should be memremap? */
                pool->hw_addr = hw_addr;
        } else {
-               pool->cpumap = dma_alloc_coherent(dev, size, &pool->phys,
+               pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
                                                  GFP_KERNEL);
-               pool->iomap = pool->cpumap;
-               pool->hw_addr = pool->phys;
+               pool->iomap = (void __iomem __force *)pool->cpumap;
+               pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
        }
 
        if (pool->iomap)
index 7f975a2..b0de8ec 100644 (file)
@@ -533,8 +533,8 @@ static int dfx_register(struct device *bdev)
        const char *print_name = dev_name(bdev);
        struct net_device *dev;
        DFX_board_t       *bp;                  /* board pointer */
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource length */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource length */
        int alloc_size;                         /* total buffer size used */
        struct resource *region;
        int err = 0;
@@ -3697,8 +3697,8 @@ static void dfx_unregister(struct device *bdev)
        int dfx_bus_pci = dev_is_pci(bdev);
        int dfx_bus_tc = DFX_BUS_TC(bdev);
        int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource lengths */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource lengths */
        int             alloc_size;             /* total buffer size used */
 
        unregister_netdev(dev);
index 7456569..0b14ac3 100644 (file)
@@ -980,9 +980,9 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
                        opts = ip_tunnel_info_opts(info);
 
                if (key->tun_flags & TUNNEL_CSUM)
-                       flags |= GENEVE_F_UDP_CSUM;
+                       flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
                else
-                       flags &= ~GENEVE_F_UDP_CSUM;
+                       flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
 
                err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
                                        info->options_len, opts,
index f4130af..fcb92c0 100644 (file)
@@ -624,6 +624,7 @@ struct nvsp_message {
 #define RNDIS_PKT_ALIGN_DEFAULT 8
 
 struct multi_send_data {
+       struct sk_buff *skb; /* skb containing the pkt */
        struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
        u32 count; /* counter of batched packets */
 };
index 059fc52..ec313fc 100644 (file)
@@ -841,6 +841,18 @@ static inline int netvsc_send_pkt(
        return ret;
 }
 
+/* Move packet out of multi send data (msd), and clear msd */
+static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
+                               struct sk_buff **msd_skb,
+                               struct multi_send_data *msdp)
+{
+       *msd_skb = msdp->skb;
+       *msd_send = msdp->pkt;
+       msdp->skb = NULL;
+       msdp->pkt = NULL;
+       msdp->count = 0;
+}
+
 int netvsc_send(struct hv_device *device,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
@@ -855,6 +867,7 @@ int netvsc_send(struct hv_device *device,
        unsigned int section_index = NETVSC_INVALID_INDEX;
        struct multi_send_data *msdp;
        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+       struct sk_buff *msd_skb = NULL;
        bool try_batch;
        bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
@@ -897,10 +910,8 @@ int netvsc_send(struct hv_device *device,
                   net_device->send_section_size) {
                section_index = netvsc_get_next_send_section(net_device);
                if (section_index != NETVSC_INVALID_INDEX) {
-                               msd_send = msdp->pkt;
-                               msdp->pkt = NULL;
-                               msdp->count = 0;
-                               msd_len = 0;
+                       move_pkt_msd(&msd_send, &msd_skb, msdp);
+                       msd_len = 0;
                }
        }
 
@@ -919,31 +930,31 @@ int netvsc_send(struct hv_device *device,
                        packet->total_data_buflen += msd_len;
                }
 
-               if (msdp->pkt)
-                       dev_kfree_skb_any(skb);
+               if (msdp->skb)
+                       dev_kfree_skb_any(msdp->skb);
 
                if (xmit_more && !packet->cp_partial) {
+                       msdp->skb = skb;
                        msdp->pkt = packet;
                        msdp->count++;
                } else {
                        cur_send = packet;
+                       msdp->skb = NULL;
                        msdp->pkt = NULL;
                        msdp->count = 0;
                }
        } else {
-               msd_send = msdp->pkt;
-               msdp->pkt = NULL;
-               msdp->count = 0;
+               move_pkt_msd(&msd_send, &msd_skb, msdp);
                cur_send = packet;
        }
 
        if (msd_send) {
-               m_ret = netvsc_send_pkt(msd_send, net_device, pb, skb);
+               m_ret = netvsc_send_pkt(msd_send, net_device, NULL, msd_skb);
 
                if (m_ret != 0) {
                        netvsc_free_send_slot(net_device,
                                              msd_send->send_buf_index);
-                       dev_kfree_skb_any(skb);
+                       dev_kfree_skb_any(msd_skb);
                }
        }
 
index 1c8db9a..1d3a665 100644 (file)
@@ -196,65 +196,6 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
        return ppi;
 }
 
-union sub_key {
-       u64 k;
-       struct {
-               u8 pad[3];
-               u8 kb;
-               u32 ka;
-       };
-};
-
-/* Toeplitz hash function
- * data: network byte order
- * return: host byte order
- */
-static u32 comp_hash(u8 *key, int klen, void *data, int dlen)
-{
-       union sub_key subk;
-       int k_next = 4;
-       u8 dt;
-       int i, j;
-       u32 ret = 0;
-
-       subk.k = 0;
-       subk.ka = ntohl(*(u32 *)key);
-
-       for (i = 0; i < dlen; i++) {
-               subk.kb = key[k_next];
-               k_next = (k_next + 1) % klen;
-               dt = ((u8 *)data)[i];
-               for (j = 0; j < 8; j++) {
-                       if (dt & 0x80)
-                               ret ^= subk.ka;
-                       dt <<= 1;
-                       subk.k <<= 1;
-               }
-       }
-
-       return ret;
-}
-
-static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
-{
-       struct flow_keys flow;
-       int data_len;
-
-       if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
-           !(flow.basic.n_proto == htons(ETH_P_IP) ||
-             flow.basic.n_proto == htons(ETH_P_IPV6)))
-               return false;
-
-       if (flow.basic.ip_proto == IPPROTO_TCP)
-               data_len = 12;
-       else
-               data_len = 8;
-
-       *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len);
-
-       return true;
-}
-
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                        void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -267,11 +208,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
        if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
                return 0;
 
-       if (netvsc_set_hash(&hash, skb)) {
-               q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
-                       ndev->real_num_tx_queues;
-               skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
-       }
+       hash = skb_get_hash(skb);
+       q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+               ndev->real_num_tx_queues;
 
        if (!nvsc_dev->chn_table[q_idx])
                q_idx = 0;
index 29cbde8..d47cf14 100644 (file)
@@ -82,9 +82,6 @@ struct bfin_sir_self {
 
 #define DRIVER_NAME "bfin_sir"
 
-#define port_membase(port)     (((struct bfin_sir_port *)(port))->membase)
-#define get_lsr_cache(port)    (((struct bfin_sir_port *)(port))->lsr)
-#define put_lsr_cache(port, v) (((struct bfin_sir_port *)(port))->lsr = (v))
 #include <asm/bfin_serial.h>
 
 static const unsigned short per[][4] = {
index 6a57a00..94e6888 100644 (file)
@@ -1323,6 +1323,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 
        list_add_tail_rcu(&vlan->list, &port->vlans);
        netif_stacked_transfer_operstate(lowerdev, dev);
+       linkwatch_fire_event(dev);
 
        return 0;
 
@@ -1522,6 +1523,7 @@ static int macvlan_device_event(struct notifier_block *unused,
        port = macvlan_port_get_rtnl(dev);
 
        switch (event) {
+       case NETDEV_UP:
        case NETDEV_CHANGE:
                list_for_each_entry(vlan, &port->vlans, list)
                        netif_stacked_transfer_operstate(vlan->lowerdev,
index 60994a8..f0a7702 100644 (file)
@@ -186,6 +186,7 @@ config MDIO_GPIO
 config MDIO_OCTEON
        tristate "Support for MDIO buses on Octeon and ThunderX SOCs"
        depends on 64BIT
+       depends on HAS_IOMEM
        help
 
          This module provides a driver for the Octeon and ThunderX MDIO
index 180f699..7a240fc 100644 (file)
@@ -846,6 +846,11 @@ static void decode_rxts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps *shhwtstamps = NULL;
        struct sk_buff *skb;
        unsigned long flags;
+       u8 overflow;
+
+       overflow = (phy_rxts->ns_hi >> 14) & 0x3;
+       if (overflow)
+               pr_debug("rx timestamp queue overflow, count %d\n", overflow);
 
        spin_lock_irqsave(&dp83640->rx_lock, flags);
 
@@ -888,6 +893,7 @@ static void decode_txts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps shhwtstamps;
        struct sk_buff *skb;
        u64 ns;
+       u8 overflow;
 
        /* We must already have the skb that triggered this. */
 
@@ -897,6 +903,17 @@ static void decode_txts(struct dp83640_private *dp83640,
                pr_debug("have timestamp but tx_queue empty\n");
                return;
        }
+
+       overflow = (phy_txts->ns_hi >> 14) & 0x3;
+       if (overflow) {
+               pr_debug("tx timestamp queue overflow, count %d\n", overflow);
+               while (skb) {
+                       skb_complete_tx_timestamp(skb, NULL);
+                       skb = skb_dequeue(&dp83640->tx_queue);
+               }
+               return;
+       }
+
        ns = phy2txts(phy_txts);
        memset(&shhwtstamps, 0, sizeof(shhwtstamps));
        shhwtstamps.hwtstamp = ns_to_ktime(ns);
index 8763bb2..5590b9c 100644 (file)
@@ -692,25 +692,29 @@ void phy_change(struct work_struct *work)
        struct phy_device *phydev =
                container_of(work, struct phy_device, phy_queue);
 
-       if (phydev->drv->did_interrupt &&
-           !phydev->drv->did_interrupt(phydev))
-               goto ignore;
+       if (phy_interrupt_is_valid(phydev)) {
+               if (phydev->drv->did_interrupt &&
+                   !phydev->drv->did_interrupt(phydev))
+                       goto ignore;
 
-       if (phy_disable_interrupts(phydev))
-               goto phy_err;
+               if (phy_disable_interrupts(phydev))
+                       goto phy_err;
+       }
 
        mutex_lock(&phydev->lock);
        if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
                phydev->state = PHY_CHANGELINK;
        mutex_unlock(&phydev->lock);
 
-       atomic_dec(&phydev->irq_disable);
-       enable_irq(phydev->irq);
+       if (phy_interrupt_is_valid(phydev)) {
+               atomic_dec(&phydev->irq_disable);
+               enable_irq(phydev->irq);
 
-       /* Reenable interrupts */
-       if (PHY_HALTED != phydev->state &&
-           phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
-               goto irq_enable_err;
+               /* Reenable interrupts */
+               if (PHY_HALTED != phydev->state &&
+                   phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
+                       goto irq_enable_err;
+       }
 
        /* reschedule state queue work to run as soon as possible */
        cancel_delayed_work_sync(&phydev->state_queue);
@@ -905,10 +909,10 @@ void phy_state_machine(struct work_struct *work)
                phydev->adjust_link(phydev->attached_dev);
                break;
        case PHY_RUNNING:
-               /* Only register a CHANGE if we are polling or ignoring
-                * interrupts and link changed since latest checking.
+               /* Only register a CHANGE if we are polling and link changed
+                * since latest checking.
                 */
-               if (!phy_interrupt_is_valid(phydev)) {
+               if (phydev->irq == PHY_POLL) {
                        old_link = phydev->link;
                        err = phy_read_status(phydev);
                        if (err)
@@ -1000,15 +1004,21 @@ void phy_state_machine(struct work_struct *work)
                   phy_state_to_str(old_state),
                   phy_state_to_str(phydev->state));
 
-       queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-                          PHY_STATE_TIME * HZ);
+       /* Only re-schedule a PHY state machine change if we are polling the
+        * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
+        * between states from phy_mac_interrupt()
+        */
+       if (phydev->irq == PHY_POLL)
+               queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+                                  PHY_STATE_TIME * HZ);
 }
 
 void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 {
-       cancel_work_sync(&phydev->phy_queue);
        phydev->link = new_link;
-       schedule_work(&phydev->phy_queue);
+
+       /* Trigger a state machine change */
+       queue_work(system_power_efficient_wq, &phydev->phy_queue);
 }
 EXPORT_SYMBOL(phy_mac_interrupt);
 
index e485f26..2e21e93 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/smscphy.h>
 
+struct smsc_phy_priv {
+       bool energy_enable;
+};
+
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
        int rc = phy_write (phydev, MII_LAN83C185_IM,
@@ -43,19 +47,14 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
-       int __maybe_unused len;
-       struct device *dev __maybe_unused = &phydev->mdio.dev;
-       struct device_node *of_node __maybe_unused = dev->of_node;
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
-       int enable_energy = 1;
 
        if (rc < 0)
                return rc;
 
-       if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
-               enable_energy = 0;
-
-       if (enable_energy) {
+       if (priv->energy_enable) {
                /* Enable energy detect mode for this SMSC Transceivers */
                rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
                               rc | MII_LAN83C185_EDPWRDOWN);
@@ -110,10 +109,13 @@ static int lan911x_config_init(struct phy_device *phydev)
  */
 static int lan87xx_read_status(struct phy_device *phydev)
 {
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int err = genphy_read_status(phydev);
-       int i;
 
-       if (!phydev->link) {
+       if (!phydev->link && priv->energy_enable) {
+               int i;
+
                /* Disable EDPD to wake up PHY */
                int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
                if (rc < 0)
@@ -149,6 +151,26 @@ static int lan87xx_read_status(struct phy_device *phydev)
        return err;
 }
 
+static int smsc_phy_probe(struct phy_device *phydev)
+{
+       struct device *dev = &phydev->mdio.dev;
+       struct device_node *of_node = dev->of_node;
+       struct smsc_phy_priv *priv;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->energy_enable = true;
+
+       if (of_property_read_bool(of_node, "smsc,disable-energy-detect"))
+               priv->energy_enable = false;
+
+       phydev->priv = priv;
+
+       return 0;
+}
+
 static struct phy_driver smsc_phy_driver[] = {
 {
        .phy_id         = 0x0007c0a0, /* OUI=0x00800f, Model#=0x0a */
@@ -159,6 +181,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -180,6 +204,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -201,6 +227,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -222,6 +250,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -242,6 +272,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -263,6 +295,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
index 90868ca..ae0905e 100644 (file)
@@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr)
        return i < MAX_CALLID;
 }
 
-static int add_chan(struct pppox_sock *sock)
+static int add_chan(struct pppox_sock *sock,
+                   struct pptp_addr *sa)
 {
        static int call_id;
 
        spin_lock(&chan_lock);
-       if (!sock->proto.pptp.src_addr.call_id) {
+       if (!sa->call_id)       {
                call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
                if (call_id == MAX_CALLID) {
                        call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
                        if (call_id == MAX_CALLID)
                                goto out_err;
                }
-               sock->proto.pptp.src_addr.call_id = call_id;
-       } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
+               sa->call_id = call_id;
+       } else if (test_bit(sa->call_id, callid_bitmap)) {
                goto out_err;
+       }
 
-       set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
-       rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
+       sock->proto.pptp.src_addr = *sa;
+       set_bit(sa->call_id, callid_bitmap);
+       rcu_assign_pointer(callid_sock[sa->call_id], sock);
        spin_unlock(&chan_lock);
 
        return 0;
@@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
        struct sock *sk = sock->sk;
        struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
        struct pppox_sock *po = pppox_sk(sk);
-       struct pptp_opt *opt = &po->proto.pptp;
        int error = 0;
 
        if (sockaddr_len < sizeof(struct sockaddr_pppox))
@@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
 
        lock_sock(sk);
 
-       opt->src_addr = sp->sa_addr.pptp;
-       if (add_chan(po))
+       if (sk->sk_state & PPPOX_DEAD) {
+               error = -EALREADY;
+               goto out;
+       }
+
+       if (sk->sk_state & PPPOX_BOUND) {
                error = -EBUSY;
+               goto out;
+       }
+
+       if (add_chan(po, &sp->sa_addr.pptp))
+               error = -EBUSY;
+       else
+               sk->sk_state |= PPPOX_BOUND;
 
+out:
        release_sock(sk);
        return error;
 }
@@ -498,7 +512,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
        }
 
        opt->dst_addr = sp->sa_addr.pptp;
-       sk->sk_state = PPPOX_CONNECTED;
+       sk->sk_state |= PPPOX_CONNECTED;
 
  end:
        release_sock(sk);
index 2ed5333..1c299b8 100644 (file)
@@ -36,7 +36,7 @@
 #define DRIVER_AUTHOR  "WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC    "LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME    "lan78xx"
-#define DRIVER_VERSION "1.0.1"
+#define DRIVER_VERSION "1.0.2"
 
 #define TX_TIMEOUT_JIFFIES             (5 * HZ)
 #define THROTTLE_JIFFIES               (HZ / 8)
@@ -462,32 +462,53 @@ static int lan78xx_read_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                   u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               return retval;
 
        for (i = 0; i < length; i++) {
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                ret = lan78xx_read_reg(dev, E2P_DATA, &val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                data[i] = val & 0xFF;
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset,
@@ -509,44 +530,67 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                    u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               goto exit;
 
        /* Issue write/erase enable command */
        val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
        ret = lan78xx_write_reg(dev, E2P_CMD, val);
-       if (unlikely(ret < 0))
-               return -EIO;
+       if (unlikely(ret < 0)) {
+               retval = -EIO;
+               goto exit;
+       }
 
-       ret = lan78xx_wait_eeprom(dev);
-       if (ret < 0)
-               return ret;
+       retval = lan78xx_wait_eeprom(dev);
+       if (retval < 0)
+               goto exit;
 
        for (i = 0; i < length; i++) {
                /* Fill data register */
                val = data[i];
                ret = lan78xx_write_reg(dev, E2P_DATA, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                /* Send "write" command */
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
@@ -904,7 +948,6 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 
        if (!phydev->link && dev->link_on) {
                dev->link_on = false;
-               netif_carrier_off(dev->net);
 
                /* reset MAC */
                ret = lan78xx_read_reg(dev, MAC_CR, &buf);
@@ -914,6 +957,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                ret = lan78xx_write_reg(dev, MAC_CR, buf);
                if (unlikely(ret < 0))
                        return -EIO;
+
+               phy_mac_interrupt(phydev, 0);
        } else if (phydev->link && !dev->link_on) {
                dev->link_on = true;
 
@@ -953,7 +998,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                          ethtool_cmd_speed(&ecmd), ecmd.duplex, ladv, radv);
 
                ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
-               netif_carrier_on(dev->net);
+               phy_mac_interrupt(phydev, 1);
        }
 
        return ret;
@@ -1495,7 +1540,6 @@ done:
 static int lan78xx_mdio_init(struct lan78xx_net *dev)
 {
        int ret;
-       int i;
 
        dev->mdiobus = mdiobus_alloc();
        if (!dev->mdiobus) {
@@ -1511,10 +1555,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
        snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
                 dev->udev->bus->busnum, dev->udev->devnum);
 
-       /* handle our own interrupt */
-       for (i = 0; i < PHY_MAX_ADDR; i++)
-               dev->mdiobus->irq[i] = PHY_IGNORE_INTERRUPT;
-
        switch (dev->devid & ID_REV_CHIP_ID_MASK_) {
        case 0x78000000:
        case 0x78500000:
@@ -1558,6 +1598,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                return -EIO;
        }
 
+       /* Enable PHY interrupts.
+        * We handle our own interrupt
+        */
+       ret = phy_read(phydev, LAN88XX_INT_STS);
+       ret = phy_write(phydev, LAN88XX_INT_MASK,
+                       LAN88XX_INT_MASK_MDINTPIN_EN_ |
+                       LAN88XX_INT_MASK_LINK_CHANGE_);
+
+       phydev->irq = PHY_IGNORE_INTERRUPT;
+
        ret = phy_connect_direct(dev->net, phydev,
                                 lan78xx_link_status_change,
                                 PHY_INTERFACE_MODE_GMII);
@@ -1580,14 +1630,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                              SUPPORTED_Pause | SUPPORTED_Asym_Pause);
        genphy_config_aneg(phydev);
 
-       /* Workaround to enable PHY interrupt.
-        * phy_start_interrupts() is API for requesting and enabling
-        * PHY interrupt. However, USB-to-Ethernet device can't use
-        * request_irq() called in phy_start_interrupts().
-        * Set PHY to PHY_HALTED and call phy_start()
-        * to make a call to phy_enable_interrupts()
-        */
-       phy_stop(phydev);
        phy_start(phydev);
 
        netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
@@ -2221,7 +2263,9 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        if (skb2) {
                skb_queue_tail(&dev->txq_pend, skb2);
 
-               if (skb_queue_len(&dev->txq_pend) > 10)
+               /* throttle TX patch at slower than SUPER SPEED USB */
+               if ((dev->udev->speed < USB_SPEED_SUPER) &&
+                   (skb_queue_len(&dev->txq_pend) > 10))
                        netif_stop_queue(net);
        } else {
                netif_dbg(dev, tx_err, dev->net,
index 2d88c79..6543918 100644 (file)
@@ -73,7 +73,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static int vxlan_net_id;
 static struct rtnl_link_ops vxlan_link_ops;
 
-static const u8 all_zeros_mac[ETH_ALEN];
+static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
@@ -1985,11 +1985,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                     vxlan->cfg.port_max, true);
 
        if (info) {
-               if (info->key.tun_flags & TUNNEL_CSUM)
-                       flags |= VXLAN_F_UDP_CSUM;
-               else
-                       flags &= ~VXLAN_F_UDP_CSUM;
-
                ttl = info->key.ttl;
                tos = info->key.tos;
 
@@ -2004,8 +1999,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto drop;
                sk = vxlan->vn4_sock->sock->sk;
 
-               if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
-                       df = htons(IP_DF);
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+                               df = htons(IP_DF);
+
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags |= VXLAN_F_UDP_CSUM;
+                       else
+                               flags &= ~VXLAN_F_UDP_CSUM;
+               }
 
                memset(&fl4, 0, sizeof(fl4));
                fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
@@ -2101,6 +2103,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        return;
                }
 
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
+                       else
+                               flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+               }
+
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
                                      0, ttl, src_port, dst_port, htonl(vni << 8), md,
index a7afdee..73fb423 100644 (file)
@@ -150,18 +150,18 @@ int ath9k_hw_nvram_swap_data(struct ath_hw *ah, bool *swap_needed, int size)
                return -EIO;
        }
 
-       if (magic == AR5416_EEPROM_MAGIC) {
-               *swap_needed = false;
-       } else if (swab16(magic) == AR5416_EEPROM_MAGIC) {
+       *swap_needed = false;
+       if (swab16(magic) == AR5416_EEPROM_MAGIC) {
                if (ah->ah_flags & AH_NO_EEP_SWAP) {
                        ath_info(common,
                                 "Ignoring endianness difference in EEPROM magic bytes.\n");
-
-                       *swap_needed = false;
                } else {
                        *swap_needed = true;
                }
-       } else {
+       } else if (magic != AR5416_EEPROM_MAGIC) {
+               if (ath9k_hw_use_flash(ah))
+                       return 0;
+
                ath_err(common,
                        "Invalid EEPROM Magic (0x%04x).\n", magic);
                return -EINVAL;
index 5363739..b98db8a 100644 (file)
@@ -879,11 +879,24 @@ int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn)
        return 0;
 }
 
-static void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
 {
+       struct sdio_func *func;
+       struct mmc_host *host;
+       uint max_blocks;
        uint nents;
        int err;
 
+       func = sdiodev->func[2];
+       host = func->card->host;
+       sdiodev->sg_support = host->max_segs > 1;
+       max_blocks = min_t(uint, host->max_blk_count, 511u);
+       sdiodev->max_request_size = min_t(uint, host->max_req_size,
+                                         max_blocks * func->cur_blksize);
+       sdiodev->max_segment_count = min_t(uint, host->max_segs,
+                                          SG_MAX_SINGLE_ALLOC);
+       sdiodev->max_segment_size = host->max_seg_size;
+
        if (!sdiodev->sg_support)
                return;
 
@@ -1021,9 +1034,6 @@ static void brcmf_sdiod_host_fixup(struct mmc_host *host)
 
 static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 {
-       struct sdio_func *func;
-       struct mmc_host *host;
-       uint max_blocks;
        int ret = 0;
 
        sdiodev->num_funcs = 2;
@@ -1054,26 +1064,6 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                goto out;
        }
 
-       /*
-        * determine host related variables after brcmf_sdiod_probe()
-        * as func->cur_blksize is properly set and F2 init has been
-        * completed successfully.
-        */
-       func = sdiodev->func[2];
-       host = func->card->host;
-       sdiodev->sg_support = host->max_segs > 1;
-       max_blocks = min_t(uint, host->max_blk_count, 511u);
-       sdiodev->max_request_size = min_t(uint, host->max_req_size,
-                                         max_blocks * func->cur_blksize);
-       sdiodev->max_segment_count = min_t(uint, host->max_segs,
-                                          SG_MAX_SINGLE_ALLOC);
-       sdiodev->max_segment_size = host->max_seg_size;
-
-       /* allocate scatter-gather table. sg support
-        * will be disabled upon allocation failure.
-        */
-       brcmf_sdiod_sgtable_alloc(sdiodev);
-
        ret = brcmf_sdiod_freezer_attach(sdiodev);
        if (ret)
                goto out;
@@ -1084,7 +1074,7 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                ret = -ENODEV;
                goto out;
        }
-       brcmf_sdiod_host_fixup(host);
+       brcmf_sdiod_host_fixup(sdiodev->func[2]->card->host);
 out:
        if (ret)
                brcmf_sdiod_remove(sdiodev);
index 4265b50..cfee477 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/netdevice.h>
+#include <linux/module.h>
 #include <brcmu_wifi.h>
 #include <brcmu_utils.h>
 #include "core.h"
index dd66143..a14d9d9 100644 (file)
@@ -4114,6 +4114,11 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
                goto fail;
        }
 
+       /* allocate scatter-gather table. sg support
+        * will be disabled upon allocation failure.
+        */
+       brcmf_sdiod_sgtable_alloc(bus->sdiodev);
+
        /* Query the F2 block size, set roundup accordingly */
        bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
        bus->roundup = min(max_roundup, bus->blocksize);
index 5ec7a6d..23f2231 100644 (file)
@@ -342,6 +342,7 @@ int brcmf_sdiod_ramrw(struct brcmf_sdio_dev *sdiodev, bool write, u32 address,
 
 /* Issue an abort to the specified function */
 int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn);
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev);
 void brcmf_sdiod_change_state(struct brcmf_sdio_dev *sdiodev,
                              enum brcmf_sdiod_state state);
 #ifdef CONFIG_PM_SLEEP
index e60cf14..fa41a5e 100644 (file)
 #define IWL7260_UCODE_API_MAX  17
 #define IWL7265_UCODE_API_MAX  17
 #define IWL7265D_UCODE_API_MAX 20
+#define IWL3168_UCODE_API_MAX  20
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK   13
 #define IWL7265_UCODE_API_OK   13
 #define IWL7265D_UCODE_API_OK  13
+#define IWL3168_UCODE_API_OK   20
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN  13
 #define IWL7265_UCODE_API_MIN  13
 #define IWL7265D_UCODE_API_MIN 13
+#define IWL3168_UCODE_API_MIN  20
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION            0x0a1d
@@ -92,6 +95,8 @@
 #define IWL3160_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL3165_NVM_VERSION            0x709
 #define IWL3165_TX_POWER_VERSION       0xffff /* meaningless */
+#define IWL3168_NVM_VERSION            0xd01
+#define IWL3168_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265_NVM_VERSION            0x0a1d
 #define IWL7265_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265D_NVM_VERSION           0x0c11
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
+#define IWL3168_FW_PRE "iwlwifi-3168-"
+#define IWL3168_MODULE_FIRMWARE(api) IWL3168_FW_PRE __stringify(api) ".ucode"
+
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -180,6 +188,12 @@ static const struct iwl_ht_params iwl7000_ht_params = {
        .ucode_api_ok = IWL7265_UCODE_API_OK,                   \
        .ucode_api_min = IWL7265_UCODE_API_MIN
 
+#define IWL_DEVICE_3008                                                \
+       IWL_DEVICE_7000_COMMON,                                 \
+       .ucode_api_max = IWL3168_UCODE_API_MAX,                 \
+       .ucode_api_ok = IWL3168_UCODE_API_OK,                   \
+       .ucode_api_min = IWL3168_UCODE_API_MIN
+
 #define IWL_DEVICE_7005D                                       \
        IWL_DEVICE_7000_COMMON,                                 \
        .ucode_api_max = IWL7265D_UCODE_API_MAX,                \
@@ -299,11 +313,11 @@ const struct iwl_cfg iwl3165_2ac_cfg = {
 
 const struct iwl_cfg iwl3168_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 3168",
-       .fw_name_pre = IWL7265D_FW_PRE,
-       IWL_DEVICE_7000,
+       .fw_name_pre = IWL3168_FW_PRE,
+       IWL_DEVICE_3008,
        .ht_params = &iwl7000_ht_params,
-       .nvm_ver = IWL3165_NVM_VERSION,
-       .nvm_calib_ver = IWL3165_TX_POWER_VERSION,
+       .nvm_ver = IWL3168_NVM_VERSION,
+       .nvm_calib_ver = IWL3168_TX_POWER_VERSION,
        .pwr_tx_backoffs = iwl7265_pwr_tx_backoffs,
        .dccm_len = IWL7265_DCCM_LEN,
 };
@@ -376,5 +390,6 @@ const struct iwl_cfg iwl7265d_n_cfg = {
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
+MODULE_FIRMWARE(IWL3168_MODULE_FIRMWARE(IWL3168_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK));
index 0036d18..ba3f0bb 100644 (file)
@@ -510,6 +510,9 @@ struct iwl_mvm_tx_resp {
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *     not a copy from the LQ command. Thus, if not the first rate was used
+ *     for Tx-ing then this value will be set to 0 by FW.
  */
 struct iwl_mvm_ba_notif {
        __le32 sta_addr_lo32;
@@ -524,7 +527,8 @@ struct iwl_mvm_ba_notif {
        __le16 scd_ssn;
        u8 txed;
        u8 txed_2_done;
-       __le16 reserved1;
+       u8 reduced_txp;
+       u8 reserved1;
 } __packed;
 
 /*
index 7bb6fd0..94caa88 100644 (file)
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -724,14 +725,28 @@ static int _rs_collect_tx_data(struct iwl_mvm *mvm,
        return 0;
 }
 
-static int rs_collect_tx_data(struct iwl_mvm *mvm,
-                             struct iwl_lq_sta *lq_sta,
-                             struct iwl_scale_tbl_info *tbl,
-                             int scale_index, int attempts, int successes,
-                             u8 reduced_txp)
+static int rs_collect_tpc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes,
+                              u8 reduced_txp)
+{
+       struct iwl_rate_scale_data *window = NULL;
+
+       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
+               return -EINVAL;
+
+       window = &tbl->tpc_win[reduced_txp];
+       return  _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
+                                   window);
+}
+
+static int rs_collect_tlc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes)
 {
        struct iwl_rate_scale_data *window = NULL;
-       int ret;
 
        if (scale_index < 0 || scale_index >= IWL_RATE_COUNT)
                return -EINVAL;
@@ -745,16 +760,6 @@ static int rs_collect_tx_data(struct iwl_mvm *mvm,
 
        /* Select window for current tx bit rate */
        window = &(tbl->win[scale_index]);
-
-       ret = _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
-                                 window);
-       if (ret)
-               return ret;
-
-       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
-               return -EINVAL;
-
-       window = &tbl->tpc_win[reduced_txp];
        return _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
                                   window);
 }
@@ -1301,17 +1306,30 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
         * first index into rate scale table.
         */
        if (info->flags & IEEE80211_TX_STAT_AMPDU) {
-               /* ampdu_ack_len = 0 marks no BA was received. In this case
-                * treat it as a single frame loss as we don't want the success
-                * ratio to dip too quickly because a BA wasn't received
+               rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len,
+                                   reduced_txp);
+
+               /* ampdu_ack_len = 0 marks no BA was received. For TLC, treat
+                * it as a single frame loss as we don't want the success ratio
+                * to dip too quickly because a BA wasn't received.
+                * For TPC, there's no need for this optimisation since we want
+                * to recover very quickly from a bad power reduction and,
+                * therefore we'd like the success ratio to get an immediate hit
+                * when failing to get a BA, so we'd switch back to a lower or
+                * zero power reduction. When FW transmits agg with a rate
+                * different from the initial rate, it will not use reduced txp
+                * and will send BA notification twice (one empty with reduced
+                * txp equal to the value from LQ and one with reduced txp 0).
+                * We need to update counters for each txp level accordingly.
                 */
                if (info->status.ampdu_ack_len == 0)
                        info->status.ampdu_len = 1;
 
-               rs_collect_tx_data(mvm, lq_sta, curr_tbl, lq_rate.index,
-                                  info->status.ampdu_len,
-                                  info->status.ampdu_ack_len,
-                                  reduced_txp);
+               rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len);
 
                /* Update success/fail counts if not searching for new mode */
                if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN) {
@@ -1344,9 +1362,13 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
                        else
                                continue;
 
-                       rs_collect_tx_data(mvm, lq_sta, tmp_tbl, lq_rate.index,
-                                          1, i < retries ? 0 : legacy_success,
-                                          reduced_txp);
+                       rs_collect_tpc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success,
+                                           reduced_txp);
+                       rs_collect_tlc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success);
                }
 
                /* Update success/fail counts if not searching for new mode */
index 8bf48a7..0914ec2 100644 (file)
@@ -1029,7 +1029,6 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
                struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
                mvmsta->tid_data[tid].rate_n_flags =
                        le32_to_cpu(tx_resp->initial_rate);
-               mvmsta->tid_data[tid].reduced_tpc = tx_resp->reduced_tpc;
                mvmsta->tid_data[tid].tx_time =
                        le16_to_cpu(tx_resp->wireless_media_time);
        }
@@ -1060,7 +1059,7 @@ static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info,
        /* TODO: not accounted if the whole A-MPDU failed */
        info->status.tx_time = tid_data->tx_time;
        info->status.status_driver_data[0] =
-               (void *)(uintptr_t)tid_data->reduced_tpc;
+               (void *)(uintptr_t)ba_notif->reduced_txp;
        info->status.status_driver_data[1] =
                (void *)(uintptr_t)tid_data->rate_n_flags;
 }
@@ -1133,6 +1132,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
                           scd_flow, ba_resp_scd_ssn, ba_notif->txed,
                           ba_notif->txed_2_done);
 
+       IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+                          ba_notif->reduced_txp);
        tid_data->next_reclaimed = ba_resp_scd_ssn;
 
        iwl_mvm_check_ratid_empty(mvm, sta, tid);
index 6261a68..00335ea 100644 (file)
@@ -378,7 +378,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x3165, 0x8110, iwl3165_2ac_cfg)},
 
 /* 3168 Series */
+       {IWL_PCI_DEVICE(0x24FB, 0x2010, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x2110, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2050, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2150, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x0000, iwl3168_2ac_cfg)},
 
 /* 7265 Series */
@@ -475,6 +478,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x8010, iwl8265_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
        {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
index c32889a..a28414c 100644 (file)
@@ -991,7 +991,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
                goto nla_put_failure;
        }
 
-       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER, ETH_ALEN, hdr->addr2))
+       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER,
+                   ETH_ALEN, data->addresses[1].addr))
                goto nla_put_failure;
 
        /* We get the skb->data */
@@ -2736,7 +2737,7 @@ static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
 
        spin_lock_bh(&hwsim_radio_lock);
        list_for_each_entry(data, &hwsim_radios, list) {
-               if (mac80211_hwsim_addr_match(data, addr)) {
+               if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) {
                        _found = true;
                        break;
                }
index 9a3966c..155f343 100644 (file)
@@ -273,8 +273,10 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg);
index 1a6740b..2553cdd 100644 (file)
@@ -274,8 +274,10 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST,
index d26018f..2d64611 100644 (file)
@@ -437,8 +437,10 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_VERSION_ERROR, 1);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_MULTICAST,
index 9733b31..a26afca 100644 (file)
@@ -1490,7 +1490,8 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_FCSFAIL));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_PHY_ERROR,
                           !(filter_flags & FIF_PLCPFAIL));
-       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_VER_ERROR, 1);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_MULTICAST,
index 3282ddb..2642714 100644 (file)
@@ -669,6 +669,7 @@ enum rt2x00_state_flags {
        CONFIG_POWERSAVING,
        CONFIG_HT_DISABLED,
        CONFIG_QOS_DISABLED,
+       CONFIG_MONITORING,
 
        /*
         * Mark we currently are sequentially reading TX_STA_FIFO register
index 7e8bb11..6a1f508 100644 (file)
@@ -277,6 +277,11 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev,
        else
                clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags);
 
+       if (conf->flags & IEEE80211_CONF_MONITOR)
+               set_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+       else
+               clear_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+
        rt2x00dev->curr_band = conf->chandef.chan->band;
        rt2x00dev->curr_freq = conf->chandef.chan->center_freq;
        rt2x00dev->tx_power = conf->power_level;
index 3c26ee6..13da95a 100644 (file)
@@ -385,11 +385,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
                        *total_flags |= FIF_PSPOLL;
        }
 
-       /*
-        * Check if there is any work left for us.
-        */
-       if (rt2x00dev->packet_filter == *total_flags)
-               return;
        rt2x00dev->packet_filter = *total_flags;
 
        rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags);
index c0e730e..24a3436 100644 (file)
@@ -530,8 +530,10 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index 7081e13..7bbc869 100644 (file)
@@ -480,8 +480,10 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index a62bf0a..5be3411 100644 (file)
@@ -351,7 +351,6 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
        case COUNTRY_CODE_SPAIN:
        case COUNTRY_CODE_FRANCE:
        case COUNTRY_CODE_ISRAEL:
-       case COUNTRY_CODE_WORLD_WIDE_13:
                return &rtl_regdom_12_13;
        case COUNTRY_CODE_MKK:
        case COUNTRY_CODE_MKK1:
@@ -360,6 +359,7 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
                return &rtl_regdom_14_60_64;
        case COUNTRY_CODE_GLOBAL_DOMAIN:
                return &rtl_regdom_14;
+       case COUNTRY_CODE_WORLD_WIDE_13:
        case COUNTRY_CODE_WORLD_WIDE_13_5G_ALL:
                return &rtl_regdom_12_13_5g_all;
        default:
index d6abf19..96ccd4e 100644 (file)
@@ -364,6 +364,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
        RING_IDX cons, prod;
        unsigned short id;
        struct sk_buff *skb;
+       bool more_to_do;
 
        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
@@ -398,18 +399,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
 
                queue->tx.rsp_cons = prod;
 
-               /*
-                * Set a new event, then check for race with update of tx_cons.
-                * Note that it is essential to schedule a callback, no matter
-                * how few buffers are pending. Even if there is space in the
-                * transmit ring, higher layers may be blocked because too much
-                * data is outstanding: in such cases notification from Xen is
-                * likely to be the only kick that we'll get.
-                */
-               queue->tx.sring->rsp_event =
-                       prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
-               mb();           /* update shared area */
-       } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
+               RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
+       } while (more_to_do);
 
        xennet_maybe_wake_tx(queue);
 }
index 4d5535c..7116472 100644 (file)
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
index 175d7c9..532e085 100644 (file)
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD)  += amd/
 obj-$(CONFIG_NTB_INTEL)        += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644 (file)
index 0000000..cfe903c
--- /dev/null
@@ -0,0 +1,7 @@
+config NTB_AMD
+       tristate "AMD Non-Transparent Bridge support"
+       depends on X86_64
+       help
+        This driver supports AMD NTB on capable Zeppelin hardware.
+
+        If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644 (file)
index 0000000..ad54da9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644 (file)
index 0000000..588803a
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME       "ntb_hw_amd"
+#define NTB_DESC       "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER                "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+       if (idx < 0 || idx > ndev->mw_count)
+               return -EINVAL;
+
+       return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+                               phys_addr_t *base,
+                               resource_size_t *size,
+                               resource_size_t *align,
+                               resource_size_t *align_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       if (base)
+               *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (size)
+               *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       if (align)
+               *align = SZ_4K;
+
+       if (align_size)
+               *align_size = 1;
+
+       return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+                               dma_addr_t addr, resource_size_t size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       unsigned long xlat_reg, limit_reg = 0;
+       resource_size_t mw_size;
+       void __iomem *mmio, *peer_mmio;
+       u64 base_addr, limit, reg_val;
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       /* make sure the range fits in the usable mw size */
+       if (size > mw_size)
+               return -EINVAL;
+
+       mmio = ndev->self_mmio;
+       peer_mmio = ndev->peer_mmio;
+
+       base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (bar != 1) {
+               xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+               limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               write64(limit, mmio + limit_reg);
+               reg_val = read64(mmio + limit_reg);
+               if (reg_val != limit) {
+                       write64(base_addr, mmio + limit_reg);
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       } else {
+               xlat_reg = AMD_BAR1XLAT_OFFSET;
+               limit_reg = AMD_BAR1LMT_OFFSET;
+
+               /* split bar addr range must all be 32 bit */
+               if (addr & (~0ull << 32))
+                       return -EINVAL;
+               if ((addr + size) & (~0ull << 32))
+                       return -EINVAL;
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               writel(limit, mmio + limit_reg);
+               reg_val = readl(mmio + limit_reg);
+               if (reg_val != limit) {
+                       writel(base_addr, mmio + limit_reg);
+                       writel(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+       if (!ndev->peer_sta)
+               return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+       /* If peer_sta is reset or D0 event, the ISR has
+        * started a timer to check link status of hardware.
+        * So here just clear status bit. And if peer_sta is
+        * D3 or PME_TO, D0/reset event will be happened when
+        * system wakeup/poweron, so do nothing here.
+        */
+       if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+               ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+       else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+               ndev->peer_sta = 0;
+
+       return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+                             enum ntb_speed *speed,
+                             enum ntb_width *width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int ret = 0;
+
+       if (amd_link_is_up(ndev)) {
+               if (speed)
+                       *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+               if (width)
+                       *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+               dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+               ret = 1;
+       } else {
+               if (speed)
+                       *speed = NTB_SPEED_NONE;
+               if (width)
+                       *width = NTB_WIDTH_NONE;
+
+               dev_dbg(ndev_dev(ndev), "link is down.\n");
+       }
+
+       return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+                              enum ntb_speed max_speed,
+                              enum ntb_width max_width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Enable event interrupt */
+       ndev->int_mask &= ~AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Disable event interrupt */
+       ndev->int_mask |= AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_vector < 0 || db_vector > ndev->db_count)
+               return 0;
+
+       return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask |= db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask &= ~db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+                               phys_addr_t *db_addr,
+                               resource_size_t *db_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_addr)
+               *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+       if (db_size)
+               *db_size = sizeof(u32);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return 0;
+
+       offset = ndev->self_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+                             int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->self_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+                                 phys_addr_t *spad_addr)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       if (spad_addr)
+               *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+                                          ndev->peer_spad + (idx << 2));
+       return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+                                  int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+       .mw_count               = amd_ntb_mw_count,
+       .mw_get_range           = amd_ntb_mw_get_range,
+       .mw_set_trans           = amd_ntb_mw_set_trans,
+       .link_is_up             = amd_ntb_link_is_up,
+       .link_enable            = amd_ntb_link_enable,
+       .link_disable           = amd_ntb_link_disable,
+       .db_valid_mask          = amd_ntb_db_valid_mask,
+       .db_vector_count        = amd_ntb_db_vector_count,
+       .db_vector_mask         = amd_ntb_db_vector_mask,
+       .db_read                = amd_ntb_db_read,
+       .db_clear               = amd_ntb_db_clear,
+       .db_set_mask            = amd_ntb_db_set_mask,
+       .db_clear_mask          = amd_ntb_db_clear_mask,
+       .peer_db_addr           = amd_ntb_peer_db_addr,
+       .peer_db_set            = amd_ntb_peer_db_set,
+       .spad_count             = amd_ntb_spad_count,
+       .spad_read              = amd_ntb_spad_read,
+       .spad_write             = amd_ntb_spad_write,
+       .peer_spad_addr         = amd_ntb_peer_spad_addr,
+       .peer_spad_read         = amd_ntb_peer_spad_read,
+       .peer_spad_write        = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       int reg;
+
+       reg = readl(mmio + AMD_SMUACK_OFFSET);
+       reg |= bit;
+       writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+       ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 status;
+
+       status = readl(mmio + AMD_INTSTAT_OFFSET);
+       if (!(status & AMD_EVENT_INTMASK))
+               return;
+
+       dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+       status &= AMD_EVENT_INTMASK;
+       switch (status) {
+       case AMD_PEER_FLUSH_EVENT:
+               dev_info(ndev_dev(ndev), "Flush is done.\n");
+               break;
+       case AMD_PEER_RESET_EVENT:
+               amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+               /* link down first */
+               ntb_link_event(&ndev->ntb);
+               /* polling peer status */
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       case AMD_PEER_D3_EVENT:
+       case AMD_PEER_PMETO_EVENT:
+               amd_ack_smu(ndev, status);
+
+               /* link down */
+               ntb_link_event(&ndev->ntb);
+
+               break;
+       case AMD_PEER_D0_EVENT:
+               mmio = ndev->peer_mmio;
+               status = readl(mmio + AMD_PMESTAT_OFFSET);
+               /* check if this is WAKEUP event */
+               if (status & 0x1)
+                       dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+               amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+               /* start a timer to poll link status */
+               schedule_delayed_work(&ndev->hb_timer,
+                                     AMD_LINK_HB_TIMEOUT);
+               break;
+       default:
+               dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+               break;
+       }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+       dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+       if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+               amd_handle_event(ndev, vec);
+
+       if (vec < AMD_DB_CNT)
+               ntb_db_event(&ndev->ntb, vec);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+       struct amd_ntb_vec *nvec = dev;
+
+       return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+       struct amd_ntb_dev *ndev = dev;
+
+       return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+                        int msix_min, int msix_max)
+{
+       struct pci_dev *pdev;
+       int rc, i, msix_count, node;
+
+       pdev = ndev_pdev(ndev);
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev->db_mask = ndev->db_valid_mask;
+
+       /* Try to set up msix irq */
+       ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+                                GFP_KERNEL, node);
+       if (!ndev->vec)
+               goto err_msix_vec_alloc;
+
+       ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+                                 GFP_KERNEL, node);
+       if (!ndev->msix)
+               goto err_msix_alloc;
+
+       for (i = 0; i < msix_max; ++i)
+               ndev->msix[i].entry = i;
+
+       msix_count = pci_enable_msix_range(pdev, ndev->msix,
+                                          msix_min, msix_max);
+       if (msix_count < 0)
+               goto err_msix_enable;
+
+       /* NOTE: Disable MSIX if msix count is less than 16 because of
+        * hardware limitation.
+        */
+       if (msix_count < msix_min) {
+               pci_disable_msix(pdev);
+               goto err_msix_enable;
+       }
+
+       for (i = 0; i < msix_count; ++i) {
+               ndev->vec[i].ndev = ndev;
+               ndev->vec[i].num = i;
+               rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+                                "ndev_vec_isr", &ndev->vec[i]);
+               if (rc)
+                       goto err_msix_request;
+       }
+
+       dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+       ndev->db_count = msix_min;
+       ndev->msix_vec_count = msix_max;
+       return 0;
+
+err_msix_request:
+       while (i-- > 0)
+               free_irq(ndev->msix[i].vector, ndev);
+       pci_disable_msix(pdev);
+err_msix_enable:
+       kfree(ndev->msix);
+err_msix_alloc:
+       kfree(ndev->vec);
+err_msix_vec_alloc:
+       ndev->msix = NULL;
+       ndev->vec = NULL;
+
+       /* Try to set up msi irq */
+       rc = pci_enable_msi(pdev);
+       if (rc)
+               goto err_msi_enable;
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_msi_request;
+
+       dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_msi_request:
+       pci_disable_msi(pdev);
+err_msi_enable:
+
+       /* Try to set up intx irq */
+       pci_intx(pdev, 1);
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_intx_request;
+
+       dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_intx_request:
+       return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       void __iomem *mmio = ndev->self_mmio;
+       int i;
+
+       pdev = ndev_pdev(ndev);
+
+       /* Mask all doorbell interrupts */
+       ndev->db_mask = ndev->db_valid_mask;
+       writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+       if (ndev->msix) {
+               i = ndev->msix_vec_count;
+               while (i--)
+                       free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+               pci_disable_msix(pdev);
+               kfree(ndev->msix);
+               kfree(ndev->vec);
+       } else {
+               free_irq(pdev->irq, ndev);
+               if (pci_dev_msi_enabled(pdev))
+                       pci_disable_msi(pdev);
+               else
+                       pci_intx(pdev, 0);
+       }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct amd_ntb_dev *ndev;
+       void __iomem *mmio;
+       char *buf;
+       size_t buf_size;
+       ssize_t ret, off;
+       union { u64 v64; u32 v32; u16 v16; } u;
+
+       ndev = filp->private_data;
+       mmio = ndev->self_mmio;
+
+       buf_size = min(count, 0x800ul);
+
+       buf = kmalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       off = 0;
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "NTB Device Information:\n");
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Connection Topology -\t%s\n",
+                        ntb_topo_string(ndev->ntb.topo));
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+       if (!amd_link_is_up(ndev)) {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tDown\n");
+       } else {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tUp\n");
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Speed -\t\tPCI-E Gen %u\n",
+                                NTB_LNK_STA_SPEED(ndev->lnk_sta));
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Width -\t\tx%u\n",
+                                NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+       }
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Memory Window Count -\t%u\n", ndev->mw_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Scratchpad Count -\t%u\n", ndev->spad_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Count -\t%u\n", ndev->db_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+       u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+       u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "\nNTB Incoming XLAT:\n");
+
+       u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT1 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT23 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT45 -\t\t%#018llx\n", u.v64);
+
+       u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT1 -\t\t\t%#06x\n", u.v32);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+       kfree(buf);
+       return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+       if (!debugfs_dir) {
+               ndev->debugfs_dir = NULL;
+               ndev->debugfs_info = NULL;
+       } else {
+               ndev->debugfs_dir =
+                       debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+               if (!ndev->debugfs_dir)
+                       ndev->debugfs_info = NULL;
+               else
+                       ndev->debugfs_info =
+                               debugfs_create_file("info", S_IRUSR,
+                                                   ndev->debugfs_dir, ndev,
+                                                   &amd_ntb_debugfs_info);
+       }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+       debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+                                   struct pci_dev *pdev)
+{
+       ndev->ntb.pdev = pdev;
+       ndev->ntb.topo = NTB_TOPO_NONE;
+       ndev->ntb.ops = &amd_ntb_ops;
+       ndev->int_mask = AMD_EVENT_INTMASK;
+       spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->peer_mmio;
+       u32 reg, stat;
+       int rc;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+       dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+       if (reg == ndev->cntl_sta)
+               return 0;
+
+       ndev->cntl_sta = reg;
+
+       rc = pci_read_config_dword(ndev->ntb.pdev,
+                                  AMD_LINK_STATUS_OFFSET, &stat);
+       if (rc)
+               return 0;
+       ndev->lnk_sta = stat;
+
+       return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+       struct amd_ntb_dev *ndev = hb_ndev(work);
+
+       if (amd_poll_link(ndev))
+               ntb_link_event(&ndev->ntb);
+
+       if (!amd_link_is_up(ndev))
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+       return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (!(reg & AMD_SIDE_READY)) {
+               reg |= AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (reg & AMD_SIDE_READY) {
+               reg &= ~AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+               readl(mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+
+       ndev->mw_count = AMD_MW_CNT;
+       ndev->spad_count = AMD_SPADS_CNT;
+       ndev->db_count = AMD_DB_CNT;
+
+       switch (ndev->ntb.topo) {
+       case NTB_TOPO_PRI:
+       case NTB_TOPO_SEC:
+               ndev->spad_count >>= 1;
+               if (ndev->ntb.topo == NTB_TOPO_PRI) {
+                       ndev->self_spad = 0;
+                       ndev->peer_spad = 0x20;
+               } else {
+                       ndev->self_spad = 0x20;
+                       ndev->peer_spad = 0;
+               }
+
+               INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       default:
+               dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+               return -EINVAL;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       /* Mask event interrupts */
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 info;
+
+       info = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (info & AMD_SIDE_MASK)
+               return NTB_TOPO_SEC;
+       else
+               return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       int rc = 0;
+
+       pdev = ndev_pdev(ndev);
+
+       ndev->ntb.topo = amd_get_topo(ndev);
+       dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+               ntb_topo_string(ndev->ntb.topo));
+
+       rc = amd_init_ntb(ndev);
+       if (rc)
+               return rc;
+
+       rc = amd_init_isr(ndev);
+       if (rc) {
+               dev_err(ndev_dev(ndev), "fail to init isr.\n");
+               return rc;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+       cancel_delayed_work_sync(&ndev->hb_timer);
+
+       ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+                           struct pci_dev *pdev)
+{
+       int rc;
+
+       pci_set_drvdata(pdev, ndev);
+
+       rc = pci_enable_device(pdev);
+       if (rc)
+               goto err_pci_enable;
+
+       rc = pci_request_regions(pdev, NTB_NAME);
+       if (rc)
+               goto err_pci_regions;
+
+       pci_set_master(pdev);
+
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+       }
+
+       ndev->self_mmio = pci_iomap(pdev, 0, 0);
+       if (!ndev->self_mmio) {
+               rc = -EIO;
+               goto err_dma_mask;
+       }
+       ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+       return 0;
+
+err_dma_mask:
+       pci_clear_master(pdev);
+err_pci_regions:
+       pci_disable_device(pdev);
+err_pci_enable:
+       pci_set_drvdata(pdev, NULL);
+       return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev = ndev_pdev(ndev);
+
+       pci_iounmap(pdev, ndev->self_mmio);
+
+       pci_clear_master(pdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       struct amd_ntb_dev *ndev;
+       int rc, node;
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+       if (!ndev) {
+               rc = -ENOMEM;
+               goto err_ndev;
+       }
+
+       ndev_init_struct(ndev, pdev);
+
+       rc = amd_ntb_init_pci(ndev, pdev);
+       if (rc)
+               goto err_init_pci;
+
+       rc = amd_init_dev(ndev);
+       if (rc)
+               goto err_init_dev;
+
+       /* write side info */
+       amd_init_side_info(ndev);
+
+       amd_poll_link(ndev);
+
+       ndev_init_debugfs(ndev);
+
+       rc = ntb_register_device(&ndev->ntb);
+       if (rc)
+               goto err_register;
+
+       dev_info(&pdev->dev, "NTB device registered.\n");
+
+       return 0;
+
+err_register:
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_dev(ndev);
+err_init_dev:
+       amd_ntb_deinit_pci(ndev);
+err_init_pci:
+       kfree(ndev);
+err_ndev:
+       return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+       struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+       ntb_unregister_device(&ndev->ntb);
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_side_info(ndev);
+       amd_deinit_dev(ndev);
+       amd_ntb_deinit_pci(ndev);
+       kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+       {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = amd_ntb_pci_tbl,
+       .probe          = amd_ntb_pci_probe,
+       .remove         = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+       return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+       pci_unregister_driver(&amd_ntb_pci_driver);
+       debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644 (file)
index 0000000..2eac3cd
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB  0x145B
+#define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)  (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)   (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)   (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+       u64 low, high;
+
+       low = readl(mmio);
+       high = readl(mmio + sizeof(u32));
+       return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+       writel(val, mmio);
+       writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+       /* AMD NTB Capability */
+       AMD_MW_CNT              = 3,
+       AMD_DB_CNT              = 16,
+       AMD_MSIX_VECTOR_CNT     = 24,
+       AMD_SPADS_CNT           = 16,
+
+       /*  AMD NTB register offset */
+       AMD_CNTL_OFFSET         = 0x200,
+
+       /* NTB control register bits */
+       PMM_REG_CTL             = BIT(21),
+       SMM_REG_CTL             = BIT(20),
+       SMM_REG_ACC_PATH        = BIT(18),
+       PMM_REG_ACC_PATH        = BIT(17),
+       NTB_CLK_EN              = BIT(16),
+
+       AMD_STA_OFFSET          = 0x204,
+       AMD_PGSLV_OFFSET        = 0x208,
+       AMD_SPAD_MUX_OFFSET     = 0x20C,
+       AMD_SPAD_OFFSET         = 0x210,
+       AMD_RSMU_HCID           = 0x250,
+       AMD_RSMU_SIID           = 0x254,
+       AMD_PSION_OFFSET        = 0x300,
+       AMD_SSION_OFFSET        = 0x330,
+       AMD_MMINDEX_OFFSET      = 0x400,
+       AMD_MMDATA_OFFSET       = 0x404,
+       AMD_SIDEINFO_OFFSET     = 0x408,
+
+       AMD_SIDE_MASK           = BIT(0),
+       AMD_SIDE_READY          = BIT(1),
+
+       /* limit register */
+       AMD_ROMBARLMT_OFFSET    = 0x410,
+       AMD_BAR1LMT_OFFSET      = 0x414,
+       AMD_BAR23LMT_OFFSET     = 0x418,
+       AMD_BAR45LMT_OFFSET     = 0x420,
+       /* xlat address */
+       AMD_POMBARXLAT_OFFSET   = 0x428,
+       AMD_BAR1XLAT_OFFSET     = 0x430,
+       AMD_BAR23XLAT_OFFSET    = 0x438,
+       AMD_BAR45XLAT_OFFSET    = 0x440,
+       /* doorbell and interrupt */
+       AMD_DBFM_OFFSET         = 0x450,
+       AMD_DBREQ_OFFSET        = 0x454,
+       AMD_MIRRDBSTAT_OFFSET   = 0x458,
+       AMD_DBMASK_OFFSET       = 0x45C,
+       AMD_DBSTAT_OFFSET       = 0x460,
+       AMD_INTMASK_OFFSET      = 0x470,
+       AMD_INTSTAT_OFFSET      = 0x474,
+
+       /* event type */
+       AMD_PEER_FLUSH_EVENT    = BIT(0),
+       AMD_PEER_RESET_EVENT    = BIT(1),
+       AMD_PEER_D3_EVENT       = BIT(2),
+       AMD_PEER_PMETO_EVENT    = BIT(3),
+       AMD_PEER_D0_EVENT       = BIT(4),
+       AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
+                               AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+                               AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+       AMD_PMESTAT_OFFSET      = 0x480,
+       AMD_PMSGTRIG_OFFSET     = 0x490,
+       AMD_LTRLATENCY_OFFSET   = 0x494,
+       AMD_FLUSHTRIG_OFFSET    = 0x498,
+
+       /* SMU register*/
+       AMD_SMUACK_OFFSET       = 0x4A0,
+       AMD_SINRST_OFFSET       = 0x4A4,
+       AMD_RSPNUM_OFFSET       = 0x4A8,
+       AMD_SMU_SPADMUTEX       = 0x4B0,
+       AMD_SMU_SPADOFFSET      = 0x4B4,
+
+       AMD_PEER_OFFSET         = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+       struct amd_ntb_dev      *ndev;
+       int                     num;
+};
+
+struct amd_ntb_dev {
+       struct ntb_dev ntb;
+
+       u32 ntb_side;
+       u32 lnk_sta;
+       u32 cntl_sta;
+       u32 peer_sta;
+
+       unsigned char mw_count;
+       unsigned char spad_count;
+       unsigned char db_count;
+       unsigned char msix_vec_count;
+
+       u64 db_valid_mask;
+       u64 db_mask;
+       u32 int_mask;
+
+       struct msix_entry *msix;
+       struct amd_ntb_vec *vec;
+
+       /* synchronize rmw access of db_mask and hw reg */
+       spinlock_t db_mask_lock;
+
+       void __iomem *self_mmio;
+       void __iomem *peer_mmio;
+       unsigned int self_spad;
+       unsigned int peer_spad;
+
+       struct delayed_work hb_timer;
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
index a198f82..40d04ef 100644 (file)
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
        limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
 
        if (bar < 4 || !ndev->bar4_split) {
-               base = ioread64(mmio + base_reg);
+               base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
                if ((addr + size) & (~0ull << 32))
                        return -EINVAL;
 
-               base = ioread32(mmio + base_reg);
+               base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
index 2eb4add..3ec149c 100644 (file)
 #define NTB_UNSAFE_DB                  BIT_ULL(0)
 #define NTB_UNSAFE_SPAD                        BIT_ULL(1)
 
+#define NTB_BAR_MASK_64                        ~(0xfull)
+#define NTB_BAR_MASK_32                        ~(0xfu)
+
 struct intel_ntb_dev;
 
 struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
 #define ndev_pdev(ndev) ((ndev)->ntb.pdev)
 #define ndev_name(ndev) pci_name(ndev_pdev(ndev))
 #define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+                                    hb_timer.work)
 
 #endif
index 60654d5..ec4775f 100644 (file)
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
        u64 rx_err_ver;
        u64 rx_memcpy;
        u64 rx_async;
+       u64 dma_rx_prep_err;
        u64 tx_bytes;
        u64 tx_pkts;
        u64 tx_ring_full;
        u64 tx_err_no_buf;
        u64 tx_memcpy;
        u64 tx_async;
+       u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
 #define QP_TO_MW(nt, qp)       ((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
+#define DMA_RETRIES            20
+#define DMA_OUT_RESOURCE_TO    50
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "free tx - \t%u\n",
                               ntb_transport_tx_free_entry(qp));
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA tx prep err - \t%llu\n",
+                              qp->dma_tx_prep_err);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA rx prep err - \t%llu\n",
+                              qp->dma_rx_prep_err);
 
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_err_no_buf = 0;
        qp->tx_memcpy = 0;
        qp->tx_async = 0;
+       qp->dma_tx_prep_err = 0;
+       qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
        struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
+       int retries = 0;
 
        len = entry->len;
 
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 
        unmap->from_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-                                            unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                    unmap->addr[0], len,
+                                                    DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_rx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_rx_copy_callback;
        txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        void __iomem *offset;
        size_t len = entry->len;
        void *buf = entry->buf;
+       int retries = 0;
 
        offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
        hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 
        unmap->to_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+                                                    len, DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_tx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_tx_copy_callback;
        txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
 
        if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
                if (qp->tx_handler)
-                       qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+                       qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
 
                ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
                             &qp->tx_free_q);
index 01852f9..a5d0eda 100644 (file)
@@ -17,3 +17,11 @@ config NTB_TOOL
         functioning at a basic level.
 
         If unsure, say N.
+
+config NTB_PERF
+       tristate "NTB RAW Perf Measuring Tool"
+       help
+        This is a tool to measure raw NTB performance by transferring data
+        to and from the window without additional software interaction.
+
+        If unsure, say N.
index 0ea32a3..9e77e0b 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644 (file)
index 0000000..c8a37ba
--- /dev/null
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *   PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME            "ntb_perf"
+#define DRIVER_DESCRIPTION     "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE         "Dual BSD/GPL"
+#define DRIVER_VERSION         "1.0"
+#define DRIVER_AUTHOR          "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION           0xffff0001
+#define MAX_THREADS            32
+#define MAX_TEST_SIZE          SZ_1M
+#define MAX_SRCS               32
+#define DMA_OUT_RESOURCE_TO    50
+#define DMA_RETRIES            20
+#define SZ_4G                  (1ULL << 32)
+#define MAX_SEG_ORDER          20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+       phys_addr_t     phys_addr;
+       resource_size_t phys_size;
+       resource_size_t xlat_align;
+       resource_size_t xlat_align_size;
+       void __iomem    *vbase;
+       size_t          xlat_size;
+       size_t          buf_size;
+       void            *virt_addr;
+       dma_addr_t      dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+       struct task_struct      *thread;
+       struct perf_ctx         *perf;
+       atomic_t                dma_sync;
+       struct dma_chan         *dma_chan;
+       int                     dma_prep_err;
+       int                     src_idx;
+       void                    *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+       struct ntb_dev          *ntb;
+       spinlock_t              db_lock;
+       struct perf_mw          mw;
+       bool                    link_is_up;
+       struct work_struct      link_cleanup;
+       struct delayed_work     link_work;
+       struct dentry           *debugfs_node_dir;
+       struct dentry           *debugfs_run;
+       struct dentry           *debugfs_threads;
+       u8                      perf_threads;
+       bool                    run;
+       struct pthr_ctx         pthr_ctx[MAX_THREADS];
+       atomic_t                tsync;
+};
+
+enum {
+       VERSION = 0,
+       MW_SZ_HIGH,
+       MW_SZ_LOW,
+       SPAD_MSG,
+       SPAD_ACK,
+       MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+       struct perf_ctx *perf = ctx;
+
+       if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work, 2*HZ);
+       else
+               schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+       struct perf_ctx *perf = ctx;
+       u64 db_bits, db_mask;
+
+       db_mask = ntb_db_vector_mask(perf->ntb, vec);
+       db_bits = ntb_db_read(perf->ntb);
+
+       dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+               vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+       .link_event = perf_link_event,
+       .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+       struct pthr_ctx *pctx = data;
+
+       atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+                        char *src, size_t size)
+{
+       struct perf_ctx *perf = pctx->perf;
+       struct dma_async_tx_descriptor *txd;
+       struct dma_chan *chan = pctx->dma_chan;
+       struct dma_device *device;
+       struct dmaengine_unmap_data *unmap;
+       dma_cookie_t cookie;
+       size_t src_off, dst_off;
+       struct perf_mw *mw = &perf->mw;
+       u64 vbase, dst_vaddr;
+       dma_addr_t dst_phys;
+       int retries = 0;
+
+       if (!use_dma) {
+               memcpy_toio(dst, src, size);
+               return size;
+       }
+
+       if (!chan) {
+               dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+               return -EINVAL;
+       }
+
+       device = chan->device;
+       src_off = (size_t)src & ~PAGE_MASK;
+       dst_off = (size_t)dst & ~PAGE_MASK;
+
+       if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+               return -ENODEV;
+
+       vbase = (u64)(u64 *)mw->vbase;
+       dst_vaddr = (u64)(u64 *)dst;
+       dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->len = size;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+                                     src_off, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       do {
+               txd = device->device_prep_dma_memcpy(chan, dst_phys,
+                                                    unmap->addr[0],
+                                                    size, DMA_PREP_INTERRUPT);
+               if (!txd) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(DMA_OUT_RESOURCE_TO);
+               }
+       } while (!txd && (++retries < DMA_RETRIES));
+
+       if (!txd) {
+               pctx->dma_prep_err++;
+               goto err_get_unmap;
+       }
+
+       txd->callback = perf_copy_callback;
+       txd->callback_param = pctx;
+       dma_set_unmap(txd, unmap);
+
+       cookie = dmaengine_submit(txd);
+       if (dma_submit_error(cookie))
+               goto err_set_unmap;
+
+       atomic_inc(&pctx->dma_sync);
+       dma_async_issue_pending(chan);
+
+       return size;
+
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+       return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+                         u64 buf_size, u64 win_size, u64 total)
+{
+       int chunks, total_chunks, i;
+       int copied_chunks = 0;
+       u64 copied = 0, result;
+       char *tmp = dst;
+       u64 perf, diff_us;
+       ktime_t kstart, kstop, kdiff;
+
+       chunks = div64_u64(win_size, buf_size);
+       total_chunks = div64_u64(total, buf_size);
+       kstart = ktime_get();
+
+       for (i = 0; i < total_chunks; i++) {
+               result = perf_copy(pctx, tmp, src, buf_size);
+               copied += result;
+               copied_chunks++;
+               if (copied_chunks == chunks) {
+                       tmp = dst;
+                       copied_chunks = 0;
+               } else
+                       tmp += buf_size;
+
+               /* Probably should schedule every 4GB to prevent soft hang. */
+               if (((copied % SZ_4G) == 0) && !use_dma) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(1);
+               }
+       }
+
+       if (use_dma) {
+               pr_info("%s: All DMA descriptors submitted\n", current->comm);
+               while (atomic_read(&pctx->dma_sync) != 0)
+                       msleep(20);
+       }
+
+       kstop = ktime_get();
+       kdiff = ktime_sub(kstop, kstart);
+       diff_us = ktime_to_us(kdiff);
+
+       pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+       pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+       perf = div64_u64(copied, diff_us);
+
+       pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+       return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+       return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+       struct pthr_ctx *pctx = data;
+       struct perf_ctx *perf = pctx->perf;
+       struct pci_dev *pdev = perf->ntb->pdev;
+       struct perf_mw *mw = &perf->mw;
+       char *dst;
+       u64 win_size, buf_size, total;
+       void *src;
+       int rc, node, i;
+       struct dma_chan *dma_chan = NULL;
+
+       pr_info("kthread %s starting...\n", current->comm);
+
+       node = dev_to_node(&pdev->dev);
+
+       if (use_dma && !pctx->dma_chan) {
+               dma_cap_mask_t dma_mask;
+
+               dma_cap_zero(dma_mask);
+               dma_cap_set(DMA_MEMCPY, dma_mask);
+               dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+                                              (void *)(unsigned long)node);
+               if (!dma_chan) {
+                       pr_warn("%s: cannot acquire DMA channel, quitting\n",
+                               current->comm);
+                       return -ENODEV;
+               }
+               pctx->dma_chan = dma_chan;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+               if (!pctx->srcs[i]) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       win_size = mw->phys_size;
+       buf_size = 1ULL << seg_order;
+       total = 1ULL << run_order;
+
+       if (buf_size > MAX_TEST_SIZE)
+               buf_size = MAX_TEST_SIZE;
+
+       dst = (char *)mw->vbase;
+
+       atomic_inc(&perf->tsync);
+       while (atomic_read(&perf->tsync) != perf->perf_threads)
+               schedule();
+
+       src = pctx->srcs[pctx->src_idx];
+       pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+       rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+       atomic_dec(&perf->tsync);
+
+       if (rc < 0) {
+               pr_err("%s: failed\n", current->comm);
+               rc = -ENXIO;
+               goto err;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       return 0;
+
+err:
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       if (dma_chan) {
+               dma_release_channel(dma_chan);
+               pctx->dma_chan = NULL;
+       }
+
+       return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+       struct perf_mw *mw = &perf->mw;
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!mw->virt_addr)
+               return;
+
+       ntb_mw_clear_trans(perf->ntb, 0);
+       dma_free_coherent(&pdev->dev, mw->buf_size,
+                         mw->virt_addr, mw->dma_addr);
+       mw->xlat_size = 0;
+       mw->buf_size = 0;
+       mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+       struct perf_mw *mw = &perf->mw;
+       size_t xlat_size, buf_size;
+
+       if (!size)
+               return -EINVAL;
+
+       xlat_size = round_up(size, mw->xlat_align_size);
+       buf_size = round_up(size, mw->xlat_align);
+
+       if (mw->xlat_size == xlat_size)
+               return 0;
+
+       if (mw->buf_size)
+               perf_free_mw(perf);
+
+       mw->xlat_size = xlat_size;
+       mw->buf_size = buf_size;
+
+       mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+                                          &mw->dma_addr, GFP_KERNEL);
+       if (!mw->virt_addr) {
+               mw->xlat_size = 0;
+               mw->buf_size = 0;
+       }
+
+       return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+       struct perf_ctx *perf =
+               container_of(work, struct perf_ctx, link_work.work);
+       struct ntb_dev *ndev = perf->ntb;
+       struct pci_dev *pdev = ndev->pdev;
+       u32 val;
+       u64 size;
+       int rc;
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       size = perf->mw.phys_size;
+       ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+       ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+       ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+       /* now read what peer wrote */
+       val = ntb_spad_read(ndev, VERSION);
+       if (val != PERF_VERSION) {
+               dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+               goto out;
+       }
+
+       val = ntb_spad_read(ndev, MW_SZ_HIGH);
+       size = (u64)val << 32;
+
+       val = ntb_spad_read(ndev, MW_SZ_LOW);
+       size |= val;
+
+       dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+       rc = perf_set_mw(perf, size);
+       if (rc)
+               goto out1;
+
+       perf->link_is_up = true;
+
+       return;
+
+out1:
+       perf_free_mw(perf);
+
+out:
+       if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work,
+                                     msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+       struct perf_ctx *perf = container_of(work,
+                                            struct perf_ctx,
+                                            link_cleanup);
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       if (!perf->link_is_up)
+               cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+       struct perf_mw *mw;
+       int rc;
+
+       mw = &perf->mw;
+
+       rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+                             &mw->xlat_align, &mw->xlat_align_size);
+       if (rc)
+               return rc;
+
+       perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+       if (!mw->vbase)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+                               size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       char *buf;
+       ssize_t ret, out_offset;
+
+       if (!perf)
+               return 0;
+
+       buf = kmalloc(64, GFP_KERNEL);
+       out_offset = snprintf(buf, 64, "%d\n", perf->run);
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+       kfree(buf);
+
+       return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       int node, i;
+
+       if (!perf->link_is_up)
+               return 0;
+
+       if (perf->perf_threads == 0)
+               return 0;
+
+       if (atomic_read(&perf->tsync) == 0)
+               perf->run = false;
+
+       if (perf->run) {
+               /* lets stop the threads */
+               perf->run = false;
+               for (i = 0; i < MAX_THREADS; i++) {
+                       if (perf->pthr_ctx[i].thread) {
+                               kthread_stop(perf->pthr_ctx[i].thread);
+                               perf->pthr_ctx[i].thread = NULL;
+                       } else
+                               break;
+               }
+       } else {
+               perf->run = true;
+
+               if (perf->perf_threads > MAX_THREADS) {
+                       perf->perf_threads = MAX_THREADS;
+                       pr_info("Reset total threads to: %u\n", MAX_THREADS);
+               }
+
+               /* no greater than 1M */
+               if (seg_order > MAX_SEG_ORDER) {
+                       seg_order = MAX_SEG_ORDER;
+                       pr_info("Fix seg_order to %u\n", seg_order);
+               }
+
+               if (run_order < seg_order) {
+                       run_order = seg_order;
+                       pr_info("Fix run_order to %u\n", run_order);
+               }
+
+               node = dev_to_node(&perf->ntb->pdev->dev);
+               /* launch kernel thread */
+               for (i = 0; i < perf->perf_threads; i++) {
+                       struct pthr_ctx *pctx;
+
+                       pctx = &perf->pthr_ctx[i];
+                       atomic_set(&pctx->dma_sync, 0);
+                       pctx->perf = perf;
+                       pctx->thread =
+                               kthread_create_on_node(ntb_perf_thread,
+                                                      (void *)pctx,
+                                                      node, "ntb_perf %d", i);
+                       if (pctx->thread)
+                               wake_up_process(pctx->thread);
+                       else {
+                               perf->run = false;
+                               for (i = 0; i < MAX_THREADS; i++) {
+                                       if (pctx->thread) {
+                                               kthread_stop(pctx->thread);
+                                               pctx->thread = NULL;
+                                       }
+                               }
+                       }
+
+                       if (perf->run == false)
+                               return -ENXIO;
+               }
+
+       }
+
+       return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = debugfs_run_read,
+       .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!debugfs_initialized())
+               return -ENODEV;
+
+       if (!perf_debugfs_dir) {
+               perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+               if (!perf_debugfs_dir)
+                       return -ENODEV;
+       }
+
+       perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+                                                   perf_debugfs_dir);
+       if (!perf->debugfs_node_dir)
+               return -ENODEV;
+
+       perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+                                               perf->debugfs_node_dir, perf,
+                                               &ntb_perf_debugfs_run);
+       if (!perf->debugfs_run)
+               return -ENODEV;
+
+       perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+                                                 perf->debugfs_node_dir,
+                                                 &perf->perf_threads);
+       if (!perf->debugfs_threads)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct pci_dev *pdev = ntb->pdev;
+       struct perf_ctx *perf;
+       int node;
+       int rc = 0;
+
+       node = dev_to_node(&pdev->dev);
+
+       perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+       if (!perf) {
+               rc = -ENOMEM;
+               goto err_perf;
+       }
+
+       perf->ntb = ntb;
+       perf->perf_threads = 1;
+       atomic_set(&perf->tsync, 0);
+       perf->run = false;
+       spin_lock_init(&perf->db_lock);
+       perf_setup_mw(ntb, perf);
+       INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+       INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+       rc = ntb_set_ctx(ntb, perf, &perf_ops);
+       if (rc)
+               goto err_ctx;
+
+       perf->link_is_up = false;
+       ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+       ntb_link_event(ntb);
+
+       rc = perf_debugfs_setup(perf);
+       if (rc)
+               goto err_ctx;
+
+       return 0;
+
+err_ctx:
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+       kfree(perf);
+err_perf:
+       return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct perf_ctx *perf = ntb->ctx;
+       int i;
+
+       dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+
+       ntb_clear_ctx(ntb);
+       ntb_link_disable(ntb);
+
+       debugfs_remove_recursive(perf_debugfs_dir);
+       perf_debugfs_dir = NULL;
+
+       if (use_dma) {
+               for (i = 0; i < MAX_THREADS; i++) {
+                       struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+                       if (pctx->dma_chan)
+                               dma_release_channel(pctx->dma_chan);
+               }
+       }
+
+       kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+       .ops = {
+               .probe = perf_probe,
+               .remove = perf_remove,
+       },
+};
+module_ntb_client(perf_client);
index 8ebfcaa..9edf7eb 100644 (file)
@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
 
        device_lock(dev);
        claim = ndns->claim;
-       if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
-               mode = "memory";
-       else if (claim && is_nd_btt(claim))
+       if (claim && is_nd_btt(claim))
                mode = "safe";
+       else if (claim && is_nd_pfn(claim))
+               mode = "memory";
+       else if (!claim && pmem_should_map_pages(dev))
+               mode = "memory";
        else
                mode = "raw";
        rc = sprintf(buf, "%s\n", mode);
index 0cc9048..ae81a2f 100644 (file)
@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
 
        switch (le32_to_cpu(pfn_sb->mode)) {
        case PFN_MODE_RAM:
-               break;
        case PFN_MODE_PMEM:
-               /* TODO: allocate from PMEM support */
-               return -ENOTTY;
+               break;
        default:
                return -ENXIO;
        }
index 706e3ff..7ee21ae 100644 (file)
@@ -679,18 +679,6 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
        return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-                                             enum irq_domain_bus_token token)
-{
-       struct irq_domain *d;
-
-       d = irq_find_matching_host(np, token);
-       if (!d)
-               d = irq_find_host(np);
-
-       return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -706,7 +694,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
        struct device_node *np = NULL;
 
        __of_msi_map_rid(dev, &np, rid);
-       return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+       return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -730,7 +718,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
        /* Check for a single msi-parent property */
        msi_np = of_parse_phandle(np, "msi-parent", 0);
        if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-               d = __of_get_msi_domain(msi_np, token);
+               d = irq_find_matching_host(msi_np, token);
                if (!d)
                        of_node_put(msi_np);
                return d;
@@ -744,7 +732,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
                while (!of_parse_phandle_with_args(np, "msi-parent",
                                                   "#msi-cells",
                                                   index, &args)) {
-                       d = __of_get_msi_domain(args.np, token);
+                       d = irq_find_matching_host(args.np, token);
                        if (d)
                                return d;
 
index 86829f8..5648317 100644 (file)
@@ -143,11 +143,31 @@ int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdio_parse_addr);
 
+/* The following is a list of PHY compatible strings which appear in
+ * some DTBs. The compatible string is never matched against a PHY
+ * driver, so is pointless. We only expect devices which are not PHYs
+ * to have a compatible string, so they can be matched to an MDIO
+ * driver.  Encourage users to upgrade their DT blobs to remove these.
+ */
+static const struct of_device_id whitelist_phys[] = {
+       { .compatible = "brcm,40nm-ephy" },
+       { .compatible = "marvell,88E1111", },
+       { .compatible = "marvell,88e1116", },
+       { .compatible = "marvell,88e1118", },
+       { .compatible = "marvell,88e1149r", },
+       { .compatible = "marvell,88e1310", },
+       { .compatible = "marvell,88E1510", },
+       { .compatible = "marvell,88E1514", },
+       { .compatible = "moxa,moxart-rtl8201cp", },
+       {}
+};
+
 /*
  * Return true if the child node is for a phy. It must either:
  * o Compatible string of "ethernet-phy-idX.X"
  * o Compatible string of "ethernet-phy-ieee802.3-c45"
  * o Compatible string of "ethernet-phy-ieee802.3-c22"
+ * o In the white list above (and issue a warning)
  * o No compatibility string
  *
  * A device which is not a phy is expected to have a compatible string
@@ -166,6 +186,13 @@ static bool of_mdiobus_child_is_phy(struct device_node *child)
        if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
                return true;
 
+       if (of_match_node(whitelist_phys, child)) {
+               pr_warn(FW_WARN
+                       "%s: Whitelisted compatible string. Please remove\n",
+                       child->full_name);
+               return true;
+       }
+
        if (!of_find_property(child, "compatible", NULL))
                return true;
 
@@ -256,11 +283,19 @@ static int of_phy_match(struct device *dev, void *phy_np)
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
        struct device *d;
+       struct mdio_device *mdiodev;
+
        if (!phy_np)
                return NULL;
 
        d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
-       return d ? to_phy_device(d) : NULL;
+       if (d) {
+               mdiodev = to_mdio_device(d);
+               if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+                       return to_phy_device(d);
+       }
+
+       return NULL;
 }
 EXPORT_SYMBOL(of_phy_find_device);
 
index dd92c5e..b48ac63 100644 (file)
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = d_alloc_name(root, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode->i_fop = fops;
        inode->i_private = priv;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return 0;
 }
 
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = d_alloc_name(parent, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index 5f2fda1..fa49f91 100644 (file)
@@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 {
        pci_lock_rescan_remove();
 
-       if (slot->flags & SLOT_IS_GOING_AWAY)
+       if (slot->flags & SLOT_IS_GOING_AWAY) {
+               pci_unlock_rescan_remove();
                return -ENODEV;
+       }
 
        /* configure all functions */
        if (!(slot->flags & SLOT_ENABLED))
index d28db0e..d78ee15 100644 (file)
@@ -899,6 +899,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"),
                },
        },
+       {
+               .ident = "Lenovo Yoga 700",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"),
+               },
+       },
        {
                .ident = "Lenovo Yoga 900",
                .matches = {
index 5b31d15..f5134ac 100644 (file)
        } \
 }
 
+#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
+#endif
 
 struct telemetry_susp_stats {
        u32 shlw_swake_ctr;
index f700723..d28e3ab 100644 (file)
@@ -342,6 +342,7 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
 /* Device IDs of parts that have 32KB MCH space */
 static const unsigned int mch_quirk_devices[] = {
        0x0154, /* Ivy Bridge */
+       0x0a04, /* Haswell-ULT */
        0x0c00, /* Haswell */
        0x1604, /* Broadwell */
 };
index 934c139..ee4f183 100644 (file)
@@ -178,7 +178,6 @@ static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
@@ -189,8 +188,7 @@ static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 
        spin_unlock_irqrestore(&register_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
        return 0;
 }
 
@@ -202,8 +200,7 @@ static int ptp_ixp_settime(struct ptp_clock_info *ptp,
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
 
-       ns = ts->tv_sec * 1000000000ULL;
-       ns += ts->tv_nsec;
+       ns = timespec64_to_ns(ts);
 
        spin_lock_irqsave(&register_lock, flags);
 
index c692dfe..50597f9 100644 (file)
@@ -139,11 +139,11 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
 
        device = container_of(kobj, struct device, kobj);
        chp = to_channelpath(device);
-       if (!chp->cmg_chars)
+       if (chp->cmg == -1)
                return 0;
 
-       return memory_read_from_buffer(buf, count, &off,
-                               chp->cmg_chars, sizeof(struct cmg_chars));
+       return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+                                      sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@ static void chp_release(struct device *dev)
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@ int chp_update_desc(struct channel_path *chp)
                return rc;
 
        rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+       if (rc)
+               return rc;
 
-       return rc;
+       return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@ int chp_new(struct chp_id chpid)
                ret = -ENODEV;
                goto out_free;
        }
-       /* Get channel-measurement characteristics. */
-       if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-               ret = chsc_get_channel_measurement_chars(chp);
-               if (ret)
-                       goto out_free;
-       } else {
-               chp->cmg = -1;
-       }
        dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
        /* make it known to the system */
index 4efd5b8..af02322 100644 (file)
@@ -48,7 +48,7 @@ struct channel_path {
        /* Channel-measurement related stuff: */
        int cmg;
        int shared;
-       void *cmg_chars;
+       struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
index a831d18..c424c0c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@ out_unreg:
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@ void chsc_chp_offline(struct chp_id chpid)
        link.chpid = chpid;
        /* Wait until previous actions have settled. */
        css_wait_for_slow_path();
+
+       mutex_lock(&chp->lock);
+       chp_update_desc(chp);
+       mutex_unlock(&chp->lock);
+
        for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@ static void chsc_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 
 void chsc_chp_online(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@ void chsc_chp_online(struct chp_id chpid)
                link.chpid = chpid;
                /* Wait until previous actions have settled. */
                css_wait_for_slow_path();
+
+               mutex_lock(&chp->lock);
+               chp_update_desc(chp);
+               mutex_unlock(&chp->lock);
+
                for_each_subchannel_staged(__s390_process_res_acc, NULL,
                                           &link);
                css_schedule_reprobe();
@@ -967,22 +980,19 @@ static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
                          struct cmg_chars *chars)
 {
-       struct cmg_chars *cmg_chars;
        int i, mask;
 
-       cmg_chars = chp->cmg_chars;
        for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
                mask = 0x80 >> (i + 3);
                if (cmcv & mask)
-                       cmg_chars->values[i] = chars->values[i];
+                       chp->cmg_chars.values[i] = chars->values[i];
                else
-                       cmg_chars->values[i] = 0;
+                       chp->cmg_chars.values[i] = 0;
        }
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-       struct cmg_chars *cmg_chars;
        int ccode, ret;
 
        struct {
@@ -1006,10 +1016,11 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                u32 data[NR_MEASUREMENT_CHARS];
        } __attribute__ ((packed)) *scmc_area;
 
-       chp->cmg_chars = NULL;
-       cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-       if (!cmg_chars)
-               return -ENOMEM;
+       chp->shared = -1;
+       chp->cmg = -1;
+
+       if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+               return 0;
 
        spin_lock_irq(&chsc_page_lock);
        memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                              scmc_area->response.code);
                goto out;
        }
-       if (scmc_area->not_valid) {
-               chp->cmg = -1;
-               chp->shared = -1;
+       if (scmc_area->not_valid)
                goto out;
-       }
+
        chp->cmg = scmc_area->cmg;
        chp->shared = scmc_area->shared;
        if (chp->cmg != 2 && chp->cmg != 3) {
                /* No cmg-dependent data. */
                goto out;
        }
-       chp->cmg_chars = cmg_chars;
        chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
                                  (struct cmg_chars *) &scmc_area->data);
 out:
        spin_unlock_irq(&chsc_page_lock);
-       if (!chp->cmg_chars)
-               kfree(cmg_chars);
-
        return ret;
 }
 
index 7b23f43..de1b6c1 100644 (file)
@@ -112,9 +112,10 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        case REP82_ERROR_TRANSPORT_FAIL:
        case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        default:
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 74edf29..eedfaa2 100644 (file)
@@ -336,9 +336,10 @@ static int convert_type80(struct zcrypt_device *zdev,
                /* The result is too short, the CEX2A card may not do that.. */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online, t80h->code);
+                              AP_QID_DEVICE(zdev->ap_dev->qid),
+                              zdev->online, t80h->code);
 
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -368,9 +369,9 @@ static int convert_response(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 9a2dd47..2195971 100644 (file)
@@ -572,9 +572,9 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
                        return -EINVAL;
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online,
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
                               msg->hdr.reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -715,9 +715,9 @@ static int convert_response_ica(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -747,9 +747,9 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
                xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -773,9 +773,9 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -800,9 +800,9 @@ static int convert_response_rng(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 2940bd7..25aba16 100644 (file)
@@ -1045,6 +1045,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
 static const struct file_operations tw_fops = {
        .owner          = THIS_MODULE,
        .unlocked_ioctl = tw_chrdev_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = tw_chrdev_ioctl,
+#endif
        .open           = tw_chrdev_open,
        .release        = NULL,
        .llseek         = noop_llseek,
index c1fe0d2..e2f31c9 100644 (file)
@@ -1106,6 +1106,7 @@ config SCSI_IPR
        tristate "IBM Power Linux RAID adapter support"
        depends on PCI && SCSI && ATA
        select FW_LOADER
+       select IRQ_POLL
        ---help---
          This driver supports the IBM Power Linux family RAID adapters.
          This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
@@ -1620,23 +1621,6 @@ config ATARI_SCSI
          ST-DMA, replacing ACSI).  It does NOT support other schemes, like
          in the Hades (without DMA).
 
-config ATARI_SCSI_TOSHIBA_DELAY
-       bool "Long delays for Toshiba CD-ROMs"
-       depends on ATARI_SCSI
-       help
-         This option increases the delay after a SCSI arbitration to
-         accommodate some flaky Toshiba CD-ROM drives. Say Y if you intend to
-         use a Toshiba CD-ROM drive; otherwise, the option is not needed and
-         would impact performance a bit, so say N.
-
-config ATARI_SCSI_RESET_BOOT
-       bool "Reset SCSI-devices at boottime"
-       depends on ATARI_SCSI
-       help
-         Reset the devices on your Atari whenever it boots.  This makes the
-         boot process fractionally longer but may assist recovery from errors
-         that leave the devices with SCSI operations partway completed.
-
 config MAC_SCSI
        tristate "Macintosh NCR5380 SCSI"
        depends on MAC && SCSI=y
index a777e5c..d728672 100644 (file)
@@ -1,17 +1,17 @@
-/* 
+/*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *      to implement 5380 SCSI drivers under Linux with a non-trantor
- *      architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *      Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *      Visionary Computing 
- *      (Unix and Linux consulting and custom programming)
- *      drew@colorado.edu
- *      +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
- * For more information, please consult 
+ * For more information, please consult
  *
  * NCR 5380 Family
  * SCSI Protocol Controller
  */
 
 /*
- * Revision 1.10 1998/9/2      Alan Cox
- *                             (alan@lxorguk.ukuu.org.uk)
- * Fixed up the timer lockups reported so far. Things still suck. Looking 
- * forward to 2.3 and per device request queues. Then it'll be possible to
- * SMP thread this beast and improve life no end.
- * Revision 1.9  1997/7/27     Ronald van Cuijlenborg
- *                             (ronald.van.cuijlenborg@tip.nl or nutty@dds.nl)
- * (hopefully) fixed and enhanced USLEEP
- * added support for DTC3181E card (for Mustek scanner)
- *
-
- * Revision 1.8                        Ingmar Baumgart
- *                             (ingmar@gonzo.schwaben.de)
- * added support for NCR53C400a card
- *
-
- * Revision 1.7  1996/3/2       Ray Van Tassle (rayvt@comm.mot.com)
- * added proc_info
- * added support needed for DTC 3180/3280
- * fixed a couple of bugs
- *
-
- * Revision 1.5  1994/01/19  09:14:57  drew
- * Fixed udelay() hack that was being used on DATAOUT phases
- * instead of a proper wait for the final handshake.
- *
- * Revision 1.4  1994/01/19  06:44:25  drew
- * *** empty log message ***
- *
- * Revision 1.3  1994/01/19  05:24:40  drew
- * Added support for TCR LAST_BYTE_SENT bit.
- *
- * Revision 1.2  1994/01/15  06:14:11  drew
- * REAL DMA support, bug fixes.
- *
- * Revision 1.1  1994/01/15  06:00:54  drew
- * Initial revision
- *
+ * With contributions from Ray Van Tassle, Ingmar Baumgart,
+ * Ronald van Cuijlenborg, Alan Cox and others.
  */
 
 /*
- * Further development / testing that should be done : 
+ * Further development / testing that should be done :
  * 1.  Cleanup the NCR5380_transfer_dma function and DMA operation complete
- *     code so that everything does the same thing that's done at the 
- *     end of a pseudo-DMA read operation.
+ * code so that everything does the same thing that's done at the
+ * end of a pseudo-DMA read operation.
  *
  * 2.  Fix REAL_DMA (interrupt driven, polled works fine) -
- *     basically, transfer size needs to be reduced by one 
- *     and the last byte read as is done with PSEUDO_DMA.
- * 
- * 4.  Test SCSI-II tagged queueing (I have no devices which support 
- *      tagged queueing)
- *
- * 5.  Test linked command handling code after Eric is ready with 
- *      the high level code.
+ * basically, transfer size needs to be reduced by one
+ * and the last byte read as is done with PSEUDO_DMA.
+ *
+ * 4.  Test SCSI-II tagged queueing (I have no devices which support
+ * tagged queueing)
  */
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x,y) {printk("LINE:%d   Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
-#define REMOVE(w,x,y,z) {printk("LINE:%d   Removing: %p->%p  %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
 
 #ifndef notyet
-#undef LINKED
 #undef REAL_DMA
 #endif
 
-#ifdef REAL_DMA_POLL
-#undef READ_OVERRUNS
-#define READ_OVERRUNS
-#endif
-
 #ifdef BOARD_REQUIRES_NO_DELAY
 #define io_recovery_delay(x)
 #else
 /*
  * Design
  *
- * This is a generic 5380 driver.  To use it on a different platform, 
+ * This is a generic 5380 driver.  To use it on a different platform,
  * one simply writes appropriate system specific macros (ie, data
- * transfer - some PC's will use the I/O bus, 68K's must use 
+ * transfer - some PC's will use the I/O bus, 68K's must use
  * memory mapped) and drops this file in their 'C' wrapper.
  *
- * (Note from hch:  unfortunately it was not enough for the different
- * m68k folks and instead of improving this driver they copied it
- * and hacked it up for their needs.  As a consequence they lost
- * most updates to this driver.  Maybe someone will fix all these
- * drivers to use a common core one day..)
- *
- * As far as command queueing, two queues are maintained for 
+ * As far as command queueing, two queues are maintained for
  * each 5380 in the system - commands that haven't been issued yet,
- * and commands that are currently executing.  This means that an 
- * unlimited number of commands may be queued, letting 
- * more commands propagate from the higher driver levels giving higher 
- * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported, 
- * allowing multiple commands to propagate all the way to a SCSI-II device 
+ * and commands that are currently executing.  This means that an
+ * unlimited number of commands may be queued, letting
+ * more commands propagate from the higher driver levels giving higher
+ * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported,
+ * allowing multiple commands to propagate all the way to a SCSI-II device
  * while a command is already executing.
  *
  *
- * Issues specific to the NCR5380 : 
- *
- * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead 
- * piece of hardware that requires you to sit in a loop polling for 
- * the REQ signal as long as you are connected.  Some devices are 
- * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect 
- * while doing long seek operations.
- * 
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like 
+ * Issues specific to the NCR5380 :
  *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- * 
- * Some tweaking of N and M needs to be done.  An algorithm based 
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these 
+ * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
+ * piece of hardware that requires you to sit in a loop polling for
+ * the REQ signal as long as you are connected.  Some devices are
+ * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  * which is started from a workqueue for each NCR5380 host in the
  * system.  It attempts to establish I_T_L or I_T_L_Q nexuses by
  * removing the commands from the issue queue and calling
- * NCR5380_select() if a nexus is not established. 
+ * NCR5380_select() if a nexus is not established.
  *
  * Once a nexus is established, the NCR5380_information_transfer()
  * phase goes through the various phases as instructed by the target.
  * if the target goes into MSG IN and sends a DISCONNECT message,
  * the command structure is placed into the per instance disconnected
- * queue, and NCR5380_main tries to find more work.  If the target is 
+ * queue, and NCR5380_main tries to find more work.  If the target is
  * idle for too long, the system will try to sleep.
  *
  * If a command has disconnected, eventually an interrupt will trigger,
  * calling NCR5380_intr()  which will in turn call NCR5380_reselect
  * to reestablish a nexus.  This will run main if necessary.
  *
- * On command termination, the done function will be called as 
+ * On command termination, the done function will be called as
  * appropriate.
  *
- * SCSI pointers are maintained in the SCp field of SCSI command 
+ * SCSI pointers are maintained in the SCp field of SCSI command
  * structures, being initialized after the command is connected
  * in NCR5380_select, and set as appropriate in NCR5380_information_transfer.
  * Note that in violation of the standard, an implicit SAVE POINTERS operation
 /*
  * Using this file :
  * This file a skeleton Linux SCSI driver for the NCR 5380 series
- * of chips.  To use it, you write an architecture specific functions 
+ * of chips.  To use it, you write an architecture specific functions
  * and macros and include this file in your driver.
  *
- * These macros control options : 
- * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be 
- *      defined.
- * 
+ * These macros control options :
+ * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
+ * defined.
+ *
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *      for commands that return with a CHECK CONDITION status. 
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *      transceivers. 
+ * transceivers.
  *
  * DONT_USE_INTR - if defined, never use interrupts, even if we probe or
- *      override-configure an IRQ.
- *
- * LIMIT_TRANSFERSIZE - if defined, limit the pseudo-dma transfers to 512
- *      bytes at a time.  Since interrupts are disabled by default during
- *      these transfers, we might need this to give reasonable interrupt
- *      service time if the transfer size gets too large.
- *
- * LINKED - if defined, linked commands are supported.
+ * override-configure an IRQ.
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- *      rely on phase mismatch and EOP interrupts to determine end 
- *      of phase.
- *
- * UNSAFE - leave interrupts enabled during pseudo-DMA transfers.  You
- *          only really want to use this if you're having a problem with
- *          dropped characters during high speed communications, and even
- *          then, you're going to be better off twiddling with transfersize
- *          in the high level code.
- *
- * Defaults for these will be provided although the user may want to adjust 
- * these to allocate CPU resources to the SCSI driver or "real" code.
- * 
- * USLEEP_SLEEP - amount of time, in jiffies, to sleep
- *
- * USLEEP_POLL - amount of time, in jiffies, to poll
+ * rely on phase mismatch and EOP interrupts to determine end
+ * of phase.
  *
  * These macros MUST be defined :
- * NCR5380_local_declare() - declare any local variables needed for your
- *      transfer routines.
  *
- * NCR5380_setup(instance) - initialize any local variables needed from a given
- *      instance of the host adapter for NCR5380_{read,write,pread,pwrite}
- * 
  * NCR5380_read(register)  - read from the specified register
  *
- * NCR5380_write(register, value) - write to the specific register 
+ * NCR5380_write(register, value) - write to the specific register
  *
- * NCR5380_implementation_fields  - additional fields needed for this 
- *      specific implementation of the NCR5380
+ * NCR5380_implementation_fields  - additional fields needed for this
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * REAL functions : 
+ * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
- * Note that the DMA setup functions should return the number of bytes 
- *      that they were able to program the controller for.
+ * Note that the DMA setup functions should return the number of bytes
+ * that they were able to program the controller for.
  *
- * Also note that generic i386/PC versions of these macros are 
- *      available as NCR5380_i386_dma_write_setup,
- *      NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * Also note that generic i386/PC versions of these macros are
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_pread(instance, dst, count);
  *
  * The generic driver is initialized by calling NCR5380_init(instance),
- * after setting the appropriate host specific fields and ID.  If the 
+ * after setting the appropriate host specific fields and ID.  If the
  * driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
  * possible) function may be used.
  */
 
-static int do_abort(struct Scsi_Host *host);
-static void do_reset(struct Scsi_Host *host);
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
-/*
- *     initialize_SCp          -       init the scsi pointer field
- *     @cmd: command block to set up
+/**
+ * initialize_SCp - init the scsi pointer field
+ * @cmd: command block to set up
  *
- *     Set up the internal fields in the SCSI command.
+ * Set up the internal fields in the SCSI command.
  */
 
 static inline void initialize_SCp(struct scsi_cmnd *cmd)
 {
-       /* 
-        * Initialize the Scsi Pointer field so that all of the commands in the 
+       /*
+        * Initialize the Scsi Pointer field so that all of the commands in the
         * various queues are valid.
         */
 
@@ -295,120 +198,123 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
 }
 
 /**
- *     NCR5380_poll_politely   -       wait for NCR5380 status bits
- *     @instance: controller to poll
- *     @reg: 5380 register to poll
- *     @bit: Bitmask to check
- *     @val: Value required to exit
- *
- *     Polls the NCR5380 in a reasonably efficient manner waiting for
- *     an event to occur, after a short quick poll we begin giving the
- *     CPU back in non IRQ contexts
- *
- *     Returns the value of the register or a negative error code.
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
  */
-static int NCR5380_poll_politely(struct Scsi_Host *instance, int reg, int bit, int val, int t)
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
 {
-       NCR5380_local_declare();
-       int n = 500;            /* At about 8uS a cycle for the cpu access */
-       unsigned long end = jiffies + t;
-       int r;
-       
-       NCR5380_setup(instance);
-
-       while( n-- > 0)
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
                cpu_relax();
-       }
-       
-       /* t time yet ? */
-       while(time_before(jiffies, end))
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
-               if(!in_interrupt())
-                       cond_resched();
-               else
-                       cpu_relax();
        }
+
        return -ETIMEDOUT;
 }
 
-static struct {
-       unsigned char value;
-       const char *name;
-} phases[] __maybe_unused = {
-       {PHASE_DATAOUT, "DATAOUT"}, 
-       {PHASE_DATAIN, "DATAIN"}, 
-       {PHASE_CMDOUT, "CMDOUT"}, 
-       {PHASE_STATIN, "STATIN"}, 
-       {PHASE_MSGOUT, "MSGOUT"}, 
-       {PHASE_MSGIN, "MSGIN"}, 
-       {PHASE_UNKNOWN, "UNKNOWN"}
-};
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
-} signals[] = { 
-       {SR_DBP, "PARITY"}, 
-       {SR_RST, "RST"}, 
-       {SR_BSY, "BSY"}, 
-       {SR_REQ, "REQ"}, 
-       {SR_MSG, "MSG"}, 
-       {SR_CD, "CD"}, 
-       {SR_IO, "IO"}, 
-       {SR_SEL, "SEL"}, 
+} signals[] = {
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, 
+},
 basrs[] = {
-       {BASR_ATN, "ATN"}, 
-       {BASR_ACK, "ACK"}, 
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
        {0, NULL}
-}, 
-icrs[] = { 
-       {ICR_ASSERT_RST, "ASSERT RST"}, 
-       {ICR_ASSERT_ACK, "ASSERT ACK"}, 
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, 
-       {ICR_ASSERT_SEL, "ASSERT SEL"}, 
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, 
-       {ICR_ASSERT_DATA, "ASSERT DATA"}, 
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
        {0, NULL}
-}, 
-mrs[] = { 
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, 
-       {MR_TARGET, "MODE TARGET"}, 
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, 
-       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"}, 
-       {MR_MONITOR_BSY, "MODE MONITOR BSY"}, 
-       {MR_DMA_MODE, "MODE DMA"}, 
-       {MR_ARBITRATE, "MODE ARBITRATION"}, 
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
+       {MR_MONITOR_BSY, "MODE MONITOR BSY"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
 /**
- *     NCR5380_print   -       print scsi bus signals
- *     @instance:      adapter state to dump
- *
- *     Print the SCSI bus signals for debugging purposes
+ * NCR5380_print - print scsi bus signals
+ * @instance: adapter state to dump
  *
- *     Locks: caller holds hostdata lock (not essential)
+ * Print the SCSI bus signals for debugging purposes
  */
 
 static void NCR5380_print(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status, data, basr, mr, icr, i;
-       NCR5380_setup(instance);
 
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
@@ -435,117 +341,56 @@ static void NCR5380_print(struct Scsi_Host *instance)
        printk("\n");
 }
 
+static struct {
+       unsigned char value;
+       const char *name;
+} phases[] = {
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
+       {PHASE_UNKNOWN, "UNKNOWN"}
+};
 
-/* 
- *     NCR5380_print_phase     -       show SCSI phase
- *     @instance: adapter to dump
- *
- *     Print the current SCSI phase for debugging purposes
+/**
+ * NCR5380_print_phase - show SCSI phase
+ * @instance: adapter to dump
  *
- *     Locks: none
+ * Print the current SCSI phase for debugging purposes
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status;
        int i;
-       NCR5380_setup(instance);
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk("scsi%d : REQ not asserted, phase unknown.\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
-               for (i = 0; (phases[i].value != PHASE_UNKNOWN) && (phases[i].value != (status & PHASE_MASK)); ++i);
-               printk("scsi%d : phase %s\n", instance->host_no, phases[i].name);
+               for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
+                    (phases[i].value != (status & PHASE_MASK)); ++i)
+                       ;
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
 #endif
 
-/*
- * These need tweaking, and would probably work best as per-device 
- * flags initialized differently for disk, tape, cd, etc devices.
- * People with broken devices are free to experiment as to what gives
- * the best results for them.
- *
- * USLEEP_SLEEP should be a minimum seek time.
- *
- * USLEEP_POLL should be a maximum rotational latency.
- */
-#ifndef USLEEP_SLEEP
-/* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP msecs_to_jiffies(20)
-#endif
-/* 300 RPM (floppy speed) */
-#ifndef USLEEP_POLL
-#define USLEEP_POLL msecs_to_jiffies(200)
-#endif
-#ifndef USLEEP_WAITLONG
-/* RvC: (reasonable time to wait on select error) */
-#define USLEEP_WAITLONG USLEEP_SLEEP
-#endif
-
-/* 
- * Function : int should_disconnect (unsigned char cmd)
- *
- * Purpose : decide whether a command would normally disconnect or 
- *      not, since if it won't disconnect we should go to sleep.
- *
- * Input : cmd - opcode of SCSI command
- *
- * Returns : DISCONNECT_LONG if we should disconnect for a really long 
- *      time (ie always, sleep, look for REQ active, sleep), 
- *      DISCONNECT_TIME_TO_DATA if we would only disconnect for a normal
- *      time-to-data delay, DISCONNECT_NONE if this command would return
- *      immediately.
- *
- *      Future sleep algorithms based on time to data can exploit 
- *      something like this so they can differentiate between "normal" 
- *      (ie, read, write, seek) and unusual commands (ie, * format).
- *
- * Note : We don't deal with commands that handle an immediate disconnect,
- *        
- */
 
-static int should_disconnect(unsigned char cmd)
-{
-       switch (cmd) {
-       case READ_6:
-       case WRITE_6:
-       case SEEK_6:
-       case READ_10:
-       case WRITE_10:
-       case SEEK_10:
-               return DISCONNECT_TIME_TO_DATA;
-       case FORMAT_UNIT:
-       case SEARCH_HIGH:
-       case SEARCH_LOW:
-       case SEARCH_EQUAL:
-               return DISCONNECT_LONG;
-       default:
-               return DISCONNECT_NONE;
-       }
-}
-
-static void NCR5380_set_timer(struct NCR5380_hostdata *hostdata, unsigned long timeout)
-{
-       hostdata->time_expires = jiffies + timeout;
-       schedule_delayed_work(&hostdata->coroutine, timeout);
-}
-
-
-static int probe_irq __initdata = 0;
+static int probe_irq __initdata;
 
 /**
- *     probe_intr      -       helper for IRQ autoprobe
- *     @irq: interrupt number
- *     @dev_id: unused
- *     @regs: unused
+ * probe_intr  -       helper for IRQ autoprobe
+ * @irq: interrupt number
+ * @dev_id: unused
+ * @regs: unused
  *
- *     Set a flag to indicate the IRQ in question was received. This is
- *     used by the IRQ probe code.
+ * Set a flag to indicate the IRQ in question was received. This is
+ * used by the IRQ probe code.
  */
+
 static irqreturn_t __init probe_intr(int irq, void *dev_id)
 {
        probe_irq = irq;
@@ -553,24 +398,20 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
 }
 
 /**
- *     NCR5380_probe_irq       -       find the IRQ of an NCR5380
- *     @instance: NCR5380 controller
- *     @possible: bitmask of ISA IRQ lines
- *
- *     Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
- *     and then looking to see what interrupt actually turned up.
+ * NCR5380_probe_irq   -       find the IRQ of an NCR5380
+ * @instance: NCR5380 controller
+ * @possible: bitmask of ISA IRQ lines
  *
- *     Locks: none, irqs must be enabled on entry
+ * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
+ * and then looking to see what interrupt actually turned up.
  */
 
 static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
                                                int possible)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned long timeout;
        int trying_irqs, i, mask;
-       NCR5380_setup(instance);
 
        for (trying_irqs = 0, i = 1, mask = 2; i < 16; ++i, mask <<= 1)
                if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
@@ -581,7 +422,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        /*
         * A interrupt is triggered whenever BSY = false, SEL = true
-        * and a bit set in the SELECT_ENABLE_REG is asserted on the 
+        * and a bit set in the SELECT_ENABLE_REG is asserted on the
         * SCSI bus.
         *
         * Note that the bus is only driven when the phase control signals
@@ -596,7 +437,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        while (probe_irq == NO_IRQ && time_before(jiffies, timeout))
                schedule_timeout_uninterruptible(1);
-       
+
        NCR5380_write(SELECT_ENABLE_REG, 0);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
@@ -608,12 +449,10 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 }
 
 /**
- *     NCR58380_info - report driver and host information
- *     @instance: relevant scsi host instance
- *
- *     For use as the host template info() handler.
+ * NCR58380_info - report driver and host information
+ * @instance: relevant scsi host instance
  *
- *     Locks: none
+ * For use as the host template info() handler.
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -633,20 +472,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
                 "flags { %s%s%s}, "
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
-#endif
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
-                hostdata->flags & FLAG_NCR53C400     ? "NCR53C400 "     : "",
-                hostdata->flags & FLAG_DTC3181E      ? "DTC3181E "      : "",
+                hostdata->flags & FLAG_NO_DMA_FIXUP  ? "NO_DMA_FIXUP "  : "",
                 hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                USLEEP_POLL, USLEEP_WAITLONG,
-#endif
+                hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY "  : "",
 #ifdef AUTOPROBE_IRQ
                 "AUTOPROBE_IRQ "
 #endif
@@ -664,47 +497,11 @@ static void prepare_info(struct Scsi_Host *instance)
 #endif
 #ifdef PSEUDO_DMA
                 "PSEUDO_DMA "
-#endif
-#ifdef UNSAFE
-                "UNSAFE "
-#endif
-#ifdef NCR53C400
-                "NCR53C400 "
 #endif
                 "");
 }
 
-/**
- *     NCR5380_print_status    -       dump controller info
- *     @instance: controller to dump
- *
- *     Print commands in the various queues, called from NCR5380_abort 
- *     and NCR5380_debug to aid debugging.
- *
- *     Locks: called functions disable irqs
- */
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-}
-
 #ifdef PSEUDO_DMA
-/******************************************/
-/*
- * /proc/scsi/[dtc pas16 t128 generic]/[0-ASC_NUM_BOARD_SUPPORTED]
- *
- * *buffer: I/O buffer
- * **start: if inout == FALSE pointer into buffer where user read should start
- * offset: current offset
- * length: length of buffer
- * hostno: Scsi_Host host_no
- * inout: TRUE - user is writing; FALSE - user is reading
- *
- * Return the number of bytes read from or written
- */
-
 static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        char *buffer, int length)
 {
@@ -714,104 +511,41 @@ static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        hostdata->spin_max_w = 0;
        return 0;
 }
-#endif
-
-static
-void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m);
-static
-void lprint_command(unsigned char *cmd, struct seq_file *m);
-static
-void lprint_opcode(int opcode, struct seq_file *m);
 
 static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-       struct Scsi_Host *instance)
+                                            struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-
-       hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-#ifdef PSEUDO_DMA
        seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
                hostdata->spin_max_w, hostdata->spin_max_r);
-#endif
-       spin_lock_irq(instance->host_lock);
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", instance->host_no);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->issue_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-       spin_unlock_irq(instance->host_lock);
        return 0;
 }
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       seq_printf(m, "scsi%d : destination target %d, lun %llu\n", cmd->device->host->host_no, cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       lprint_command(cmd->cmnd, m);
-}
-
-static void lprint_command(unsigned char *command, struct seq_file *m)
-{
-       int i, s;
-       lprint_opcode(command[0], m);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, "%02x ", command[i]);
-       seq_putc(m, '\n');
-}
-
-static void lprint_opcode(int opcode, struct seq_file *m)
-{
-       seq_printf(m, "%2d (0x%02x)", opcode, opcode);
-}
-
+#endif
 
 /**
- *     NCR5380_init    -       initialise an NCR5380
- *     @instance: adapter to configure
- *     @flags: control flags
+ * NCR5380_init - initialise an NCR5380
+ * @instance: adapter to configure
+ * @flags: control flags
  *
- *     Initializes *instance and corresponding 5380 chip,
- *      with flags OR'd into the initial flags value.
+ * Initializes *instance and corresponding 5380 chip,
+ * with flags OR'd into the initial flags value.
  *
- *     Notes : I assume that the host, hostno, and id bits have been
- *      set correctly.  I don't care about the irq and other fields. 
+ * Notes : I assume that the host, hostno, and id bits have been
+ * set correctly. I don't care about the irq and other fields.
  *
- *     Returns 0 for success
- *
- *     Locks: interrupts must be enabled when we are called 
+ * Returns 0 for success
  */
 
 static int NCR5380_init(struct Scsi_Host *instance, int flags)
 {
-       NCR5380_local_declare();
-       int i, pass;
-       unsigned long timeout;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       if(in_interrupt())
-               printk(KERN_ERR "NCR5380_init called with interrupts off!\n");
-       /* 
-        * On NCR53C400 boards, NCR5380 registers are mapped 8 past 
-        * the base address.
-        */
-
-#ifdef NCR53C400
-       if (flags & FLAG_NCR53C400)
-               instance->NCR5380_instance_name += NCR53C400_address_adjust;
-#endif
-
-       NCR5380_setup(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long deadline;
 
-       hostdata->aborted = 0;
+       hostdata->host = instance;
        hostdata->id_mask = 1 << instance->this_id;
+       hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
                if (i > hostdata->id_mask)
                        hostdata->id_higher_mask |= i;
@@ -820,21 +554,21 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 #ifdef REAL_DMA
        hostdata->dmalen = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
-       
-       INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
-       
-       /* The CHECK code seems to break the 53C400. Will check it later maybe */
-       if (flags & FLAG_NCR53C400)
-               hostdata->flags = FLAG_HAS_LAST_BYTE_SENT | flags;
-       else
-               hostdata->flags = FLAG_CHECK_LAST_BYTE_SENT | flags;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
 
-       hostdata->host = instance;
-       hostdata->time_expires = 0;
+       hostdata->flags = flags;
+
+       INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -843,43 +577,69 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
-#ifdef NCR53C400
-       if (hostdata->flags & FLAG_NCR53C400) {
-               NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       }
-#endif
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
 
-       /* 
-        * Detect and correct bus wedge problems.
-        *
-        * If the system crashed, it may have crashed in a state 
-        * where a SCSI command was still executing, and the 
-        * SCSI bus is not in a BUS FREE STATE.
-        *
-        * If this is the case, we'll try to abort the currently
-        * established nexus which we know nothing about, and that
-        * failing, do a hard reset of the SCSI bus 
-        */
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
 
        for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
                switch (pass) {
                case 1:
                case 3:
                case 5:
-                       printk(KERN_INFO "scsi%d: SCSI bus busy, waiting up to five seconds\n", instance->host_no);
-                       timeout = jiffies + 5 * HZ;
-                       NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, 0, 5*HZ);
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
                        break;
                case 2:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting abort\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
                        do_abort(instance);
                        break;
                case 4:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting reset\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
                        do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
                        break;
                case 6:
-                       printk(KERN_ERR "scsi%d: bus locked solid or invalid override\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
                        return -ENXIO;
                }
        }
@@ -887,450 +647,513 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 }
 
 /**
- *     NCR5380_exit    -       remove an NCR5380
- *     @instance: adapter to remove
+ * NCR5380_exit - remove an NCR5380
+ * @instance: adapter to remove
+ *
+ * Assumes that no more work can be queued (e.g. by NCR5380_intr).
  */
 
 static void NCR5380_exit(struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-       cancel_delayed_work_sync(&hostdata->coroutine);
+       cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
 }
 
 /**
- *     NCR5380_queue_command           -       queue a command
- *     @cmd: SCSI command
- *     @done: completion handler
- *
- *      cmd is added to the per instance issue_queue, with minor 
- *      twiddling done to the host specific fields of cmd.  If the 
- *      main coroutine is not running, it is restarted.
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+
+       cmd->scsi_done(cmd);
+}
+
+/**
+ * NCR5380_queue_command - queue a command
+ * @instance: the relevant SCSI adapter
+ * @cmd: SCSI command
  *
- *     Locks: host lock taken by caller
+ * cmd is added to the per-instance issue queue, with minor
+ * twiddling done to the host specific fields of cmd.  If the
+ * main coroutine is not running, it is restarted.
  */
 
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd, void (*done) (struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+                                 struct scsi_cmnd *cmd)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+       unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
-               done(cmd);
+               cmd->scsi_done(cmd);
                return 0;
        }
-#endif                         /* (NDEBUG & NDEBUG_NO_WRITE) */
+#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /* 
-        * We use the host_scribble field as a pointer to the next command  
-        * in a queue 
-        */
-
-       cmd->host_scribble = NULL;
-       cmd->scsi_done = done;
        cmd->result = 0;
 
-       /* 
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE 
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       /*
+        * Insert the cmd into the issue queue. Note that REQUEST SENSE
         * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the 
+        * clear the contingent allegiance condition that exists and the
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               cmd->host_scribble = (unsigned char *) hostdata->issue_queue;
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp->host_scribble; tmp = (struct scsi_cmnd *) tmp->host_scribble);
-               LIST(cmd, tmp);
-               tmp->host_scribble = (unsigned char *) cmd;
-       }
-       dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
-       /* Run the coroutine if it isn't already running. */
        /* Kick off command processing */
-       schedule_delayed_work(&hostdata->coroutine, 0);
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
-static DEF_SCSI_QCMD(NCR5380_queue_command)
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (!(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
 
 /**
- *     NCR5380_main    -       NCR state machines
- *
- *     NCR5380_main is a coroutine that runs as long as more work can 
- *      be done on the NCR5380 host adapters in a system.  Both 
- *      NCR5380_queue_command() and NCR5380_intr() will try to start it 
- *      in case it is not running.
- * 
- *     Locks: called as its own thread with no locks held. Takes the
- *     host lock and called routines may take the isa dma lock.
+ * NCR5380_main - NCR state machines
+ *
+ * NCR5380_main is a coroutine that runs as long as more work can
+ * be done on the NCR5380 host adapters in a system.  Both
+ * NCR5380_queue_command() and NCR5380_intr() will try to start it
+ * in case it is not running.
  */
 
 static void NCR5380_main(struct work_struct *work)
 {
        struct NCR5380_hostdata *hostdata =
-               container_of(work, struct NCR5380_hostdata, coroutine.work);
+               container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       
-       spin_lock_irq(instance->host_lock);
+
        do {
-               /* Lock held here */
                done = 1;
-               if (!hostdata->connected && !hostdata->selecting) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       {
-                               if (prev != tmp)
-                                   dprintk(NDEBUG_LISTS, "MAIN tmp=%p   target=%d   busy=%d lun=%llu\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               if (!(hostdata->busy[tmp->device->id] &
-                                     (1 << (u8)(tmp->device->lun & 0xff)))) {
-                                       if (prev) {
-                                               REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                               prev->host_scribble = tmp->host_scribble;
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, tmp->host_scribble);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                                       }
-                                       tmp->host_scribble = NULL;
 
-                                       /* 
-                                        * Attempt to establish an I_T_L nexus here. 
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying. 
-                                        */
-                                       dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
-       
-                                       /*
-                                        * A successful selection is defined as one that 
-                                        * leaves us with the command connected and 
-                                        * in hostdata->connected, OR has terminated the
-                                        * command.
-                                        *
-                                        * With successful commands, we fall through
-                                        * and see if we can do an information transfer,
-                                        * with failures we will restart.
-                                        */
-                                       hostdata->selecting = NULL;
-                                       /* RvC: have to preset this to indicate a new command is being performed */
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               break;
-                                       } else {
-                                               LIST(tmp, hostdata->issue_queue);
-                                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                                               hostdata->issue_queue = tmp;
-                                               done = 0;
-                                               dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
-                                       }
-                                       /* lock held here still */
-                               }       /* if target/lun is not busy */
-                       }       /* for */
-                       /* exited locked */
-               }       /* if (!hostdata->connected) */
-               if (hostdata->selecting) {
-                       tmp = (struct scsi_cmnd *) hostdata->selecting;
-                       /* Selection will drop and retake the lock */
-                       if (!NCR5380_select(instance, tmp)) {
-                               /* Ok ?? */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
                        } else {
-                               /* RvC: device failed, so we wait a long time
-                                  this is needed for Mustek scanners, that
-                                  do not respond to commands immediately
-                                  after a scan */
-                               printk(KERN_DEBUG "scsi%d: device %d did not respond in time\n", instance->host_no, tmp->device->id);
-                               LIST(tmp, hostdata->issue_queue);
-                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                               hostdata->issue_queue = tmp;
-                               NCR5380_set_timer(hostdata, USLEEP_WAITLONG);
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
                        }
-               }       /* if hostdata->selecting */
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dmalen
 #endif
-                   && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
                    ) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
                        done = 0;
-               } else
-                       break;
+               }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-       
-       spin_unlock_irq(instance->host_lock);
 }
 
 #ifndef DONT_USE_INTR
 
 /**
- *     NCR5380_intr    -       generic NCR5380 irq handler
- *     @irq: interrupt number
- *     @dev_id: device info
- *
- *     Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
- *      from the disconnected queue, and restarting NCR5380_main() 
- *      as required.
- *
- *     Locks: takes the needed instance locks
+ * NCR5380_intr - generic NCR5380 irq handler
+ * @irq: interrupt number
+ * @dev_id: device info
+ *
+ * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
+ * from the disconnected queue, and restarting NCR5380_main()
+ * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
-static irqreturn_t NCR5380_intr(int dummy, void *dev_id)
+static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
-       NCR5380_local_declare();
        struct Scsi_Host *instance = dev_id;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       int done;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
        unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
-               instance->irq);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       basr = NCR5380_read(BUS_AND_STATUS_REG);
+       if (basr & BASR_IRQ) {
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
-       do {
-               done = 1;
-               spin_lock_irqsave(instance->host_lock, flags);
-               /* Look for pending interrupts */
-               NCR5380_setup(instance);
-               basr = NCR5380_read(BUS_AND_STATUS_REG);
-               /* XXX dispatch to appropriate routine if found and done=0 */
-               if (basr & BASR_IRQ) {
-                       NCR5380_dprint(NDEBUG_INTR, instance);
-                       if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
-                               done = 0;
-                               dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
-                               NCR5380_reselect(instance);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if (basr & BASR_PARITY_ERROR) {
-                               dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                               dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else {
 #if defined(REAL_DMA)
-                               /*
-                                * We should only get PHASE MISMATCH and EOP interrupts
-                                * if we have DMA enabled, so do a sanity check based on
-                                * the current setting of the MODE register.
-                                */
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
+                        */
 
-                               if ((NCR5380_read(MODE_REG) & MR_DMA) && ((basr & BASR_END_DMA_TRANSFER) || !(basr & BASR_PHASE_MATCH))) {
-                                       int transferred;
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                                       if (!hostdata->connected)
-                                               panic("scsi%d : received end of DMA interrupt with no connected cmd\n", instance->hostno);
+                       int transferred;
 
-                                       transferred = (hostdata->dmalen - NCR5380_dma_residual(instance));
-                                       hostdata->connected->SCp.this_residual -= transferred;
-                                       hostdata->connected->SCp.ptr += transferred;
-                                       hostdata->dmalen = 0;
+                       if (!hostdata->connected)
+                               panic("scsi%d : DMA interrupt with no connected cmd\n",
+                                     instance->hostno);
 
-                                       (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                                                       
-                                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
-                                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2*HZ);
+                       transferred = hostdata->dmalen - NCR5380_dma_residual(instance);
+                       hostdata->connected->SCp.this_residual -= transferred;
+                       hostdata->connected->SCp.ptr += transferred;
+                       hostdata->dmalen = 0;
 
-                                       NCR5380_write(MODE_REG, MR_BASE);
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                               }
-#else
-                               dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-#endif
+                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
+                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2 * HZ);
+
+                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                       NCR5380_write(MODE_REG, MR_BASE);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               } else
+#endif /* REAL_DMA */
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
                        }
-               }       /* if BASR_IRQ */
-               spin_unlock_irqrestore(instance->host_lock, flags);
-               if(!done)
-                       schedule_delayed_work(&hostdata->coroutine, 0);
-       } while (!done);
-       return IRQ_HANDLED;
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+               }
+               handled = 1;
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
+       }
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       return IRQ_RETVAL(handled);
 }
 
-#endif 
+#endif
 
-/* 
+/*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *      including ARBITRATION, SELECTION, and initial message out for 
- *      IDENTIFY and queue messages. 
- *
- * Inputs : instance - instantiation of the 5380 driver on which this 
- *      target lives, cmd - SCSI command to execute.
- * 
- * Returns : -1 if selection could not execute for some reason,
- *      0 if selection succeeded or failed because the target 
- *      did not respond.
- *
- * Side effects : 
- *      If bus busy, arbitration failed, etc, NCR5380_select() will exit 
- *              with registers as they should have been on entry - ie
- *              SELECT_ENABLE will be set appropriately, the NCR5380
- *              will cease to drive any SCSI bus signals.
- *
- *      If successful : I_T_L or I_T_L_Q nexus will be established, 
- *              instance->connected will be set to cmd.  
- *              SELECT interrupt will be disabled.
- *
- *      If failed (no target) : cmd->scsi_done() will be called, and the 
- *              cmd->result host byte set to DID_BAD_TARGET.
- *
- *     Locks: caller holds hostdata lock in IRQ mode
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
+ *
+ * Inputs : instance - instantiation of the 5380 driver on which this
+ * target lives, cmd - SCSI command to execute.
+ *
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
+ *
+ * Side effects :
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
+ *
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
+ *
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned char value;
        int err;
-       NCR5380_setup(instance);
-
-       if (hostdata->selecting)
-               goto part2;
-
-       hostdata->restart_select = 0;
 
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
-       /* 
-        * Set the phase bits to 0, otherwise the NCR5380 won't drive the 
+       /*
+        * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-       /* 
+       /*
         * Start arbitration.
         */
 
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-       /* We can be relaxed here, interrupts are on, we are
-          in workqueue context, the birds are singing in the trees */
-       spin_unlock_irq(instance->host_lock);
-       err = NCR5380_poll_politely(instance, INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS, ICR_ARBITRATION_PROGRESS, 5*HZ);
-       spin_lock_irq(instance->host_lock);
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
+       }
        if (err < 0) {
-               printk(KERN_DEBUG "scsi: arbitration timeout at %d\n", __LINE__);
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
+       spin_unlock_irq(&hostdata->lock);
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
-
-       /* 
-        * The arbitration delay is 2.2us, but this is a minimum and there is 
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
-
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
-               goto failed;
-       }
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
-
-       if (!(hostdata->flags & FLAG_DTC3181E) &&
-           /* RvC: DTC3181E has some trouble with this
-            *      so we simply removed it. Seems to work with
-            *      only Mustek scanner attached
-            */
+       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
+           (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
            (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
-               goto failed;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
-       /* 
-        * Again, bus clear + bus settle time is 1.2us, however, this is 
+
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
+       NCR5380_write(INITIATOR_COMMAND_REG,
+                     ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
+
+       /*
+        * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-       udelay(2);
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
+
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
+       if (!hostdata->selecting) {
+               NCR5380_write(MODE_REG, MR_BASE);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               goto out;
+       }
 
-       /* 
-        * Now that we have won arbitration, start Selection process, asserting 
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
+
+       /*
+        * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << scmd_id(cmd))));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
-       /* 
+       /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
         * since this is the only way to guarantee that we'll get a MESSAGE OUT
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
-       /* 
+       /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
-        * The initiator shall then wait at least two deskew delays and release 
+        * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
         */
-       udelay(1);              /* wingel -- wait two bus deskew delay >2*45ns */
+       udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
-       /* 
+       /*
         * Something weird happens when we cease to drive BSY - looks
-        * like the board/chip is letting us do another read before the 
+        * like the board/chip is letting us do another read before the
         * appropriate propagation delay has expired, and we're confusing
         * a BSY signal from ourselves as the target's response to SELECTION.
         *
         * A small delay (the 'C++' frontend breaks the pipeline with an
         * unnecessary jump, making it work on my 386-33/Trantor T128, the
-        * tighter 'C' code breaks and requires this) solves the problem - 
-        * the 1 us delay is arbitrary, and only used because this delay will 
-        * be the same on other platforms and since it works here, it should 
+        * tighter 'C' code breaks and requires this) solves the problem -
+        * the 1 us delay is arbitrary, and only used because this delay will
+        * be the same on other platforms and since it works here, it should
         * work there.
         *
         * wingel suggests that this could be due to failing to wait
@@ -1339,50 +1162,43 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
-       /* 
-        * The SCSI specification calls for a 250 ms timeout for the actual 
+       /*
+        * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
-       /* 
-        * XXX very interesting - we're seeing a bounce where the BSY we 
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-       hostdata->select_time = 0;      /* we count the clock ticks at which we polled */
-       hostdata->selecting = cmd;
-
-part2:
-       /* RvC: here we enter after a sleeping period, or immediately after
-          execution of part 1
-          we poll only once ech clock tick */
-       value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO);
-
-       if (!value && (hostdata->select_time < HZ/4)) {
-               /* RvC: we still must wait for a device response */
-               hostdata->select_time++;        /* after 25 ticks the device has failed */
-               NCR5380_set_timer(hostdata, 1);
-               return 0;       /* RvC: we return here with hostdata->selecting set,
-                                  to go to sleep */
-       }
-
-       hostdata->selecting = NULL;/* clear this pointer, because we passed the
-                                          waiting period */
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-       /* 
-        * No less than two deskew delays after the initiator detects the 
-        * BSY signal is true, it shall release the SEL signal and may 
+
+       /*
+        * No less than two deskew delays after the initiator detects the
+        * BSY signal is true, it shall release the SEL signal and may
         * change the DATA BUS.                                     -wingel
         */
 
@@ -1390,53 +1206,38 @@ part2:
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << scmd_id(cmd))) {
-                       printk(KERN_DEBUG "scsi%d : weirdness\n", instance->host_no);
-                       if (hostdata->restart_select)
-                               printk(KERN_DEBUG "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_SELECTION, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-       hostdata->targets_present |= (1 << scmd_id(cmd));
-
        /*
-        * Since we followed the SCSI spec, and raised ATN while SEL 
+        * Since we followed the SCSI spec, and raised ATN while SEL
         * was true but before BSY was false during selection, the information
         * transfer phase should be a MESSAGE OUT phase so that we can send the
         * IDENTIFY message.
-        * 
+        *
         * If SCSI-II tagged queuing is enabled, we also send a SIMPLE_QUEUE_TAG
         * message (2 bytes) with a tag ID that we increment with every command
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *       which claim to support tagged queuing but fail when more than
-        *       some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
 
-       spin_unlock_irq(instance->host_lock);
        err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
-       spin_lock_irq(instance->host_lock);
-       
-       if(err) {
-               printk(KERN_ERR "scsi%d: timeout at NCR5380.c:%d\n", instance->host_no, __LINE__);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
        }
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun);
 
        len = 1;
@@ -1446,104 +1247,82 @@ part2:
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
-       hostdata->busy[cmd->device->id] |= (1 << (cmd->device->lun & 0xFF));
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 
        initialize_SCp(cmd);
 
-       return 0;
-
-       /* Selection failed */
-failed:
-       return -1;
+       cmd = NULL;
 
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
-/* 
- * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
+ *
  * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
 
 /*
- * Note : this code is not as quick as it could be, however it 
+ * Note : this code is not as quick as it could be, however it
  * IS 100% reliable, and for the actual data transfer where speed
  * counts, we will always do a pseudo DMA or DMA transfer.
  */
 
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_pio(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
        unsigned char p = *phase, tmp;
        int c = *count;
        unsigned char *d = *data;
-       /*
-        *      RvC: some administrative data to process polling time
-        */
-       int break_allowed = 0;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       NCR5380_setup(instance);
-
-       if (!(p & SR_IO))
-               dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
-       else
-               dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
 
-       /* 
-        * The NCR5380 chip will only drive the SCSI bus when the 
+       /*
+        * The NCR5380 chip will only drive the SCSI bus when the
         * phase specified in the appropriate bits of the TARGET COMMAND
         * REGISTER match the STATUS REGISTER
         */
 
-        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-       /* RvC: don't know if this is necessary, but other SCSI I/O is short
-        *      so breaks are not necessary there
-        */
-       if ((p == PHASE_DATAIN) || (p == PHASE_DATAOUT)) {
-               break_allowed = 1;
-       }
-       do {
-               /* 
-                * Wait for assertion of REQ, after which the phase bits will be 
-                * valid 
-                */
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
-               /* RvC: we simply poll once, after that we stop temporarily
-                *      and let the device buffer fill up
-                *      if breaking is not allowed, we keep polling as long as needed
+       do {
+               /*
+                * Wait for assertion of REQ, after which the phase bits will be
+                * valid
                 */
 
-               /* FIXME */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ) && !break_allowed);
-               if (!(tmp & SR_REQ)) {
-                       /* timeout condition */
-                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
                        break;
-               }
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
-                       NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
+                       NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
+
                /* Do actual transfer from SCSI bus to / from memory */
                if (!(p & SR_IO))
                        NCR5380_write(OUTPUT_DATA_REG, *d);
@@ -1552,7 +1331,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 
                ++d;
 
-               /* 
+               /*
                 * The SCSI standard suggests that in MSGOUT phase, the initiator
                 * should drop ATN on the last byte of the message phase
                 * after REQ has been asserted for the handshake but before
@@ -1563,29 +1342,34 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                        if (!((p & SR_MSG) && c > 1)) {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               /* FIXME - if this fails bus reset ?? */
-               NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
 /*
- * We have several special cases to consider during REQ/ACK handshaking : 
- * 1.  We were in MSGOUT phase, and we are on the last byte of the 
- *      message.  ATN must be dropped as ACK is dropped.
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
+ * message.  ATN must be dropped as ACK is dropped.
  *
- * 2.  We are in a MSGIN phase, and we are on the last byte of the  
- *      message.  We must exit with ACK asserted, so that the calling
- *      code may raise ATN before dropping ACK to reject the message.
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
+ * message.  We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
  *
  * 3.  ACK and ATN are clear and the target may proceed as normal.
  */
@@ -1597,12 +1381,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
-       if (tmp & SR_REQ)
+       /* The phase read from the bus is valid if either REQ is (already)
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
+        */
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1614,79 +1402,80 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 }
 
 /**
- *     do_reset        -       issue a reset command
- *     @host: adapter to reset
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
  *
- *     Issue a reset sequence to the NCR5380 and try and get the bus
- *     back into sane shape.
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- *     Locks: caller holds queue lock
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
  */
-static void do_reset(struct Scsi_Host *host) {
-       NCR5380_local_declare();
-       NCR5380_setup(host);
 
-       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(25);
+       udelay(50);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
- * 
- * Purpose : abort the currently established nexus.  Should only be 
- *      called from a routine which can drop into a 
- * 
- * Returns : 0 on success, -1 on failure.
- *
- * Locks: queue lock held by caller
- *     FIXME: sort this out and get new_eh running
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
  */
 
-static int do_abort(struct Scsi_Host *host) {
-       NCR5380_local_declare();
+static int do_abort(struct Scsi_Host *instance)
+{
        unsigned char *msgptr, phase, tmp;
        int len;
        int rc;
-       NCR5380_setup(host);
-
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       /* 
-        * Wait for the target to indicate a valid phase by asserting 
-        * REQ.  Once this happens, we'll have either a MSGOUT phase 
-        * and can immediately send the ABORT message, or we'll have some 
+       /*
+        * Wait for the target to indicate a valid phase by asserting
+        * REQ.  Once this happens, we'll have either a MSGOUT phase
+        * and can immediately send the ABORT message, or we'll have some
         * other phase and will have to source/sink data.
-        * 
+        *
         * We really don't care what value was on the bus or what value
         * the target sees, so we just handshake.
         */
 
-       rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, SR_REQ, 60 * HZ);
-       
-       if(rc < 0)
-               return -1;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-       tmp = (unsigned char)rc;
-       
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-               rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, 0, 3*HZ);
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-               if(rc == -1)
-                       return -1;
        }
+
        tmp = ABORT;
        msgptr = &tmp;
        len = 1;
        phase = PHASE_MSGOUT;
-       NCR5380_transfer_pio(host, &phase, &len, &msgptr);
+       NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
 
        /*
         * If we got here, and the command completed successfully,
@@ -1694,32 +1483,37 @@ static int do_abort(struct Scsi_Host *host) {
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA) || defined(PSEUDO_DMA) || defined (REAL_DMA_POLL)
-/* 
- * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *      or pseudo DMA.
+ * or pseudo DMA.
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
- * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Returns : -1 when different phase is entered without transferring
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
- *     Locks: io_request lock held by caller
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
-static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_dma(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
        register unsigned char *d = *data;
@@ -1730,54 +1524,47 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        unsigned char saved_data = 0, overrun = 0, residue;
 #endif
 
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       NCR5380_setup(instance);
-
        if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
                *phase = tmp;
                return -1;
        }
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-#ifdef READ_OVERRUNS
        if (p & SR_IO) {
-               c -= 2;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS))
+                       c -= 2;
        }
-#endif
-       dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
        hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
+
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 #endif
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
 #ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 #elif defined(REAL_DMA_POLL)
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #else
        /*
         * Note : on my sample board, watch-dog timeouts occurred when interrupts
-        * were not disabled for the duration of a single DMA transfer, from 
+        * were not disabled for the duration of a single DMA transfer, from
         * before the setting of DMA mode to after transfer of the last byte.
         */
 
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_unlock_irq(instance->host_lock);
-#endif
-       /* KLL May need eop and parity in 53c400 */
-       if (hostdata->flags & FLAG_NCR53C400)
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE |
-                               MR_ENABLE_PAR_CHECK | MR_ENABLE_PAR_INTR |
-                               MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                                       MR_ENABLE_EOP_INTR);
        else
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #endif                         /* def REAL_DMA */
 
        dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
 
-       /* 
-        *      On the PAS16 at least I/O recovery delays are not needed here.
-        *      Everyone else seems to want them.
+       /*
+        * On the PAS16 at least I/O recovery delays are not needed here.
+        * Everyone else seems to want them.
         */
 
        if (p & SR_IO) {
@@ -1797,49 +1584,49 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        } while ((tmp & BASR_PHASE_MATCH) && !(tmp & (BASR_BUSY_ERROR | BASR_END_DMA_TRANSFER)));
 
 /*
-   At this point, either we've completed DMA, or we have a phase mismatch,
-   or we've unexpectedly lost BUSY (which is a real error).
-
-   For write DMAs, we want to wait until the last byte has been
-   transferred out over the bus before we turn off DMA mode.  Alas, there
-   seems to be no terribly good way of doing this on a 5380 under all
-   conditions.  For non-scatter-gather operations, we can wait until REQ
-   and ACK both go false, or until a phase mismatch occurs.  Gather-writes
-   are nastier, since the device will be expecting more data than we
-   are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
-   could test LAST BIT SENT to assure transfer (I imagine this is precisely
-   why this signal was added to the newer chips) but on the older 538[01]
-   this signal does not exist.  The workaround for this lack is a watchdog;
-   we bail out of the wait-loop after a modest amount of wait-time if
-   the usual exit conditions are not met.  Not a terribly clean or
-   correct solution :-%
-
-   Reads are equally tricky due to a nasty characteristic of the NCR5380.
-   If the chip is in DMA mode for an READ, it will respond to a target's
-   REQ by latching the SCSI data into the INPUT DATA register and asserting
-   ACK, even if it has _already_ been notified by the DMA controller that
-   the current DMA transfer has completed!  If the NCR5380 is then taken
-   out of DMA mode, this already-acknowledged byte is lost.
-
-   This is not a problem for "one DMA transfer per command" reads, because
-   the situation will never arise... either all of the data is DMA'ed
-   properly, or the target switches to MESSAGE IN phase to signal a
-   disconnection (either operation bringing the DMA to a clean halt).
-   However, in order to handle scatter-reads, we must work around the
-   problem.  The chosen fix is to DMA N-2 bytes, then check for the
-   condition before taking the NCR5380 out of DMA mode.  One or two extra
-   bytes are transferred via PIO as necessary to fill out the original
-   request.
+ * At this point, either we've completed DMA, or we have a phase mismatch,
+ * or we've unexpectedly lost BUSY (which is a real error).
+ *
+ * For DMA sends, we want to wait until the last byte has been
+ * transferred out over the bus before we turn off DMA mode.  Alas, there
+ * seems to be no terribly good way of doing this on a 5380 under all
+ * conditions.  For non-scatter-gather operations, we can wait until REQ
+ * and ACK both go false, or until a phase mismatch occurs.  Gather-sends
+ * are nastier, since the device will be expecting more data than we
+ * are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
+ * could test Last Byte Sent to assure transfer (I imagine this is precisely
+ * why this signal was added to the newer chips) but on the older 538[01]
+ * this signal does not exist.  The workaround for this lack is a watchdog;
+ * we bail out of the wait-loop after a modest amount of wait-time if
+ * the usual exit conditions are not met.  Not a terribly clean or
+ * correct solution :-%
+ *
+ * DMA receive is equally tricky due to a nasty characteristic of the NCR5380.
+ * If the chip is in DMA receive mode, it will respond to a target's
+ * REQ by latching the SCSI data into the INPUT DATA register and asserting
+ * ACK, even if it has _already_ been notified by the DMA controller that
+ * the current DMA transfer has completed!  If the NCR5380 is then taken
+ * out of DMA mode, this already-acknowledged byte is lost. This is
+ * not a problem for "one DMA transfer per READ command", because
+ * the situation will never arise... either all of the data is DMA'ed
+ * properly, or the target switches to MESSAGE IN phase to signal a
+ * disconnection (either operation bringing the DMA to a clean halt).
+ * However, in order to handle scatter-receive, we must work around the
+ * problem.  The chosen fix is to DMA N-2 bytes, then check for the
+ * condition before taking the NCR5380 out of DMA mode.  One or two extra
+ * bytes are transferred via PIO as necessary to fill out the original
+ * request.
  */
 
        if (p & SR_IO) {
-#ifdef READ_OVERRUNS
-               udelay(10);
-               if (((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) == (BASR_PHASE_MATCH | BASR_ACK))) {
-                       saved_data = NCR5380_read(INPUT_DATA_REGISTER);
-                       overrun = 1;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS)) {
+                       udelay(10);
+                       if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) ==
+                           (BASR_PHASE_MATCH | BASR_ACK)) {
+                               saved_data = NCR5380_read(INPUT_DATA_REGISTER);
+                               overrun = 1;
+                       }
                }
-#endif
        } else {
                int limit = 100;
                while (((tmp = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_ACK) || (NCR5380_read(STATUS_REG) & SR_REQ)) {
@@ -1850,7 +1637,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
+       dsprintk(NDEBUG_DMA, "polled DMA transfer complete, basr 0x%02x, sr 0x%02x\n",
+                tmp, NCR5380_read(STATUS_REG));
 
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -1861,8 +1649,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        *data += c;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-#ifdef READ_OVERRUNS
-       if (*phase == p && (p & SR_IO) && residue == 0) {
+       if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS) &&
+           *phase == p && (p & SR_IO) && residue == 0) {
                if (overrun) {
                        dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
                        **data = saved_data;
@@ -1877,7 +1665,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                NCR5380_transfer_pio(instance, phase, &cnt, data);
                *count -= toPIO - cnt;
        }
-#endif
 
        dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
        return 0;
@@ -1886,95 +1673,64 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        return 0;
 #else                          /* defined(REAL_DMA_POLL) */
        if (p & SR_IO) {
-#ifdef DMA_WORKS_RIGHT
-               foo = NCR5380_pread(instance, d, c);
-#else
-               int diff = 1;
-               if (hostdata->flags & FLAG_NCR53C400) {
-                       diff = 0;
-               }
-               if (!(foo = NCR5380_pread(instance, d, c - diff))) {
+               foo = NCR5380_pread(instance, d,
+                       hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * We can't disable DMA mode after successfully transferring 
+                        * We can't disable DMA mode after successfully transferring
                         * what we plan to be the last byte, since that would open up
-                        * a race condition where if the target asserted REQ before 
+                        * a race condition where if the target asserted REQ before
                         * we got the DMA mode reset, the NCR5380 would have latched
                         * an additional byte into the INPUT DATA register and we'd
                         * have dropped it.
-                        * 
-                        * The workaround was to transfer one fewer bytes than we 
-                        * intended to with the pseudo-DMA read function, wait for 
+                        *
+                        * The workaround was to transfer one fewer bytes than we
+                        * intended to with the pseudo-DMA read function, wait for
                         * the chip to latch the last byte, read it, and then disable
                         * pseudo-DMA mode.
-                        * 
+                        *
                         * After REQ is asserted, the NCR5380 asserts DRQ and ACK.
                         * REQ is deasserted when ACK is asserted, and not reasserted
                         * until ACK goes false.  Since the NCR5380 won't lower ACK
                         * until DACK is asserted, which won't happen unless we twiddle
-                        * the DMA port or we take the NCR5380 out of DMA mode, we 
-                        * can guarantee that we won't handshake another extra 
+                        * the DMA port or we take the NCR5380 out of DMA mode, we
+                        * can guarantee that we won't handshake another extra
                         * byte.
                         */
 
-                       if (!(hostdata->flags & FLAG_NCR53C400)) {
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ));
-                               /* Wait for clean handshake */
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               d[c - 1] = NCR5380_read(INPUT_DATA_REG);
+                       if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
+                                                 BASR_DRQ, BASR_DRQ, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
                        }
+                       if (NCR5380_poll_politely(instance, STATUS_REG,
+                                                 SR_REQ, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
+                       }
+                       d[c - 1] = NCR5380_read(INPUT_DATA_REG);
                }
-#endif
        } else {
-#ifdef DMA_WORKS_RIGHT
                foo = NCR5380_pwrite(instance, d, c);
-#else
-               int timeout;
-               dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
-               if (!(foo = NCR5380_pwrite(instance, d, c))) {
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * Wait for the last byte to be sent.  If REQ is being asserted for 
-                        * the byte we're interested, we'll ACK it and it will go false.  
+                        * Wait for the last byte to be sent.  If REQ is being asserted for
+                        * the byte we're interested, we'll ACK it and it will go false.
                         */
-                       if (!(hostdata->flags & FLAG_HAS_LAST_BYTE_SENT)) {
-                               timeout = 20000;
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
-
-                               if (!timeout)
-                                       dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
-
-                               if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
-                                       hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
-                                       if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
-                                               hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
-                                               dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
-                                       }
-                               }
-                       } else {
-                               dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
-                               while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
-                               dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
+                       if (NCR5380_poll_politely2(instance,
+                            BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
+                            BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
                        }
                }
-#endif
        }
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-       if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
-               dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
-               if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
-                       dprintk(NDEBUG_C400_PWRITE, "53C400w:    got it, reading reset interrupt reg\n");
-                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       printk("53C400w:    IRQ NOT THERE!\n");
-               }
-       }
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        *data = d + c;
        *count = 0;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_lock_irq(instance->host_lock);
-#endif                         /* defined(REAL_DMA_POLL) */
        return foo;
 #endif                         /* def REAL_DMA */
 }
@@ -1983,25 +1739,23 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
 /*
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
- * Purpose : run through the various SCSI phases and do as the target 
- *      directs us to.  Operates on the currently connected command, 
- *      instance->connected.
+ * Purpose : run through the various SCSI phases and do as the target
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
- * Side effects : SCSI things happen, the disconnected queue will be 
- *      modified if a command disconnects, *instance->connected will
- *      change.
+ * Side effects : SCSI things happen, the disconnected queue will be
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
- * XXX Note : we need to watch for bus free or a reset condition here 
- *      to recover from an unexpected bus free condition.
- *
- * Locks: io_request_lock held by caller in IRQ mode
+ * XXX Note : we need to watch for bus free or a reset condition here
+ * to recover from an unexpected bus free condition.
  */
 
-static void NCR5380_information_transfer(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)instance->hostdata;
+static void NCR5380_information_transfer(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -2010,13 +1764,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
-       /* RvC: we need to set the end of the polling time */
-       unsigned long poll_time = jiffies + USLEEP_POLL;
+       struct scsi_cmnd *cmd;
 
-       NCR5380_setup(instance);
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       while (1) {
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -2028,24 +1780,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        if (sink && (phase != PHASE_MSGOUT)) {
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
+                                             ICR_ASSERT_ACK);
+                               while (NCR5380_read(STATUS_REG) & SR_REQ)
+                                       ;
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
+
                        switch (phase) {
-                       case PHASE_DATAIN:
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d : NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n", instance->host_no);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
-                               /* 
+                       case PHASE_DATAIN:
+                               /*
                                 * If there is no room left in the current buffer in the
                                 * scatter-gather list, move onto the next one.
                                 */
@@ -2055,10 +1811,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
+
                                /*
-                                * The preferred transfer method is going to be 
+                                * The preferred transfer method is going to be
                                 * PSEUDO-DMA for systems that are strictly PIO,
                                 * since we can let the hardware do the handshaking.
                                 *
@@ -2068,50 +1827,39 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                 */
 
 #if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
-                               /* KLL
-                                * PSEUDO_DMA is defined here. If this is the g_NCR5380
-                                * driver then it will always be defined, so the
-                                * FLAG_NO_PSEUDO_DMA is used to inhibit PDMA in the base
-                                * NCR5380 case.  I think this is a fairly clean solution.
-                                * We supplement these 2 if's with the flag.
-                                */
-#ifdef NCR5380_dma_xfer_len
-                               if (!cmd->device->borken && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && (transfersize = NCR5380_dma_xfer_len(instance, cmd)) != 0) {
-#else
-                               transfersize = cmd->transfersize;
-
-#ifdef LIMIT_TRANSFERSIZE      /* If we have problems with interrupt service */
-                               if (transfersize > 512)
-                                       transfersize = 512;
-#endif                         /* LIMIT_TRANSFERSIZE */
-
-                               if (!cmd->device->borken && transfersize && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && cmd->SCp.this_residual && !(cmd->SCp.this_residual % transfersize)) {
-                                       /* Limit transfers to 32K, for xx400 & xx406
-                                        * pseudoDMA that transfers in 128 bytes blocks. */
-                                       if (transfersize > 32 * 1024)
-                                               transfersize = 32 * 1024;
-#endif
+                               transfersize = 0;
+                               if (!cmd->device->borken &&
+                                   !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize) {
                                        len = transfersize;
-                                       if (NCR5380_transfer_dma(instance, &phase, &len, (unsigned char **) &cmd->SCp.ptr)) {
+                                       if (NCR5380_transfer_dma(instance, &phase,
+                                           &len, (unsigned char **)&cmd->SCp.ptr)) {
                                                /*
-                                                * If the watchdog timer fires, all future accesses to this
-                                                * device will use the polled-IO.
+                                                * If the watchdog timer fires, all future
+                                                * accesses to this device will use the
+                                                * polled-IO.
                                                 */
                                                scmd_printk(KERN_INFO, cmd,
-                                                           "switching to slow handshake\n");
+                                                       "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else
                                                cmd->SCp.this_residual -= transfersize - len;
                                } else
 #endif                         /* defined(PSEUDO_DMA) || defined(REAL_DMA_POLL) */
-                                       NCR5380_transfer_pio(instance, &phase, (int *) &cmd->SCp.this_residual, (unsigned char **)
-                                                            &cmd->SCp.ptr);
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
+                                       NCR5380_transfer_pio(instance, &phase,
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
                                break;
                        case PHASE_MSGIN:
                                len = 1;
@@ -2120,101 +1868,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                                       /*
-                                        * Linking lets us reduce the time required to get the 
-                                        * next command out to the device, hopefully this will
-                                        * mean we don't waste another revolution due to the delays
-                                        * required by ARBITRATION and another SELECTION.
-                                        *
-                                        * In the current implementation proposal, low level drivers
-                                        * merely have to start the next command, pointed to by 
-                                        * next_link, done() is called as with unlinked commands.
-                                        */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       /* 
-                                        * Sanity check : A linked command should only terminate with
-                                        * one of these messages if there are more linked commands
-                                        * available.
-                                        */
-                                       if (!cmd->next_link) {
-                                           printk("scsi%d : target %d lun %llu linked command complete, no next_link\n" instance->host_no, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif                         /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        sink = 1;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       hostdata->connected = NULL;
-                                       dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %llu completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
-
-                                       /* 
-                                        * I'm not sure what the correct thing to do here is : 
-                                        * 
-                                        * If the command that just executed is NOT a request 
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        * 
-                                        * If it was a REQUEST SENSE command, we need some way 
-                                        * to differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the result
-                                        * code unchanged.
-                                        * 
-                                        * The non-obvious place is where the REQUEST SENSE failed 
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
+                                       hostdata->connected = NULL;
 
-                                               LIST(cmd, hostdata->issue_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->issue_queue;
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* 
-                                        * Restore phase bits to 0 so an interrupted selection, 
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
@@ -2229,38 +1918,33 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        default:
                                                break;
                                        }
-                               case DISCONNECT:{
-                                               /* Accept message by clearing ACK */
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                               cmd->device->disconnect = 1;
-                                               LIST(cmd, hostdata->disconnected_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->disconnected_queue;
-                                               hostdata->connected = NULL;
-                                               hostdata->disconnected_queue = cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %llu was moved from connected to" "  the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                               /* 
-                                                * Restore phase bits to 0 so an interrupted selection, 
-                                                * arbitration can resume.
-                                                */
-                                               NCR5380_write(TARGET_COMMAND_REG, 0);
-
-                                               /* Enable reselect interrupts */
-                                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                               /* Wait for bus free to avoid nasty timeouts - FIXME timeout !*/
-                                               /* NCR538_poll_politely(instance, STATUS_REG, SR_BSY, 0, 30 * HZ); */
-                                               while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                                       barrier();
-                                               return;
-                                       }
-                                       /* 
+                                       break;
+                               case DISCONNECT:
+                                       /* Accept message by clearing ACK */
+                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                                       hostdata->connected = NULL;
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
+                                        * arbitration can resume.
+                                        */
+                                       NCR5380_write(TARGET_COMMAND_REG, 0);
+
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       return;
+                                       /*
                                         * The SCSI data pointer is *IMPLICITLY* saved on a disconnect
-                                        * operation, in violation of the SCSI spec so we can safely 
+                                        * operation, in violation of the SCSI spec so we can safely
                                         * ignore SAVE/RESTORE pointers calls.
                                         *
-                                        * Unfortunately, some disks violate the SCSI spec and 
+                                        * Unfortunately, some disks violate the SCSI spec and
                                         * don't issue the required SAVE_POINTERS message before
-                                        * disconnecting, and we have to break spec to remain 
+                                        * disconnecting, and we have to break spec to remain
                                         * compatible.
                                         */
                                case SAVE_POINTERS:
@@ -2269,31 +1953,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                        break;
                                case EXTENDED_MESSAGE:
-/* 
- * Extended messages are sent in the following format :
- * Byte         
- * 0            EXTENDED_MESSAGE == 1
- * 1            length (includes one byte for code, doesn't 
- *              include first two bytes)
- * 2            code
- * 3..length+1  arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
- * byte, since spi_print_msg() wants the whole thing.  
- */
+                                       /*
+                                        * Start the message buffer with the EXTENDED_MESSAGE
+                                        * byte, since spi_print_msg() wants the whole thing.
+                                        */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
+
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
-
-                                       if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2301,7 +1982,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2311,34 +1993,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk("scsi%d: error receiving extended message\n", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk("scsi%d: extended message code %02x length %d is too long\n", instance->host_no, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
-                                       /* 
-                                        * If we get something weird that we aren't expecting, 
+                                       /*
+                                        * If we get something weird that we aren't expecting,
                                         * reject it.
                                         */
                                default:
                                        if (!tmp) {
-                                               printk("scsi%d: rejecting message ", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown message %02x\n",tmp);
+                                                           "rejecting unknown message %02x\n",
+                                                           tmp);
                                        else
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown extended message code %02x, length %d\n", extended_msg[1], extended_msg[0]);
+                                                           "rejecting unknown extended message code %02x, length %d\n",
+                                                           extended_msg[1], extended_msg[0]);
 
                                        msgout = MESSAGE_REJECT;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                        break;
-                               }       /* switch (tmp) */
+                               } /* switch (tmp) */
                                break;
                        case PHASE_MSGOUT:
                                len = 1;
@@ -2346,10 +2036,9 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       cmd->scsi_done(cmd);
+                                       complete_cmd(instance, cmd);
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
@@ -2358,17 +2047,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        case PHASE_CMDOUT:
                                len = cmd->cmd_len;
                                data = cmd->cmnd;
-                               /* 
-                                * XXX for performance reasons, on machines with a 
-                                * PSEUDO-DMA architecture we should probably 
-                                * use the dma transfer function.  
+                               /*
+                                * XXX for performance reasons, on machines with a
+                                * PSEUDO-DMA architecture we should probably
+                                * use the dma transfer function.
                                 */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                               if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
-                                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                                       dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                                       return;
-                               }
                                break;
                        case PHASE_STATIN:
                                len = 1;
@@ -2377,46 +2061,37 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d : unknown phase\n", instance->host_no);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
-                       }       /* switch(phase) */
-               }               /* if (tmp * SR_REQ) */
-               else {
-                       /* RvC: go to sleep if polling time expired
-                        */
-                       if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
-                               NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                               dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                               return;
-                       }
+                       } /* switch(phase) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
                }
-       }                       /* while (1) */
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
- * Purpose : does reselection, initializing the instance->connected 
- *      field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *      nexus has been reestablished,
- *      
- * Inputs : instance - this instance of the NCR5380.
+ * Purpose : does reselection, initializing the instance->connected
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
- * Locks: io_request_lock held by caller if IRQ driven
+ * Inputs : instance - this instance of the NCR5380.
  */
 
-static void NCR5380_reselect(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)
-        instance->hostdata;
+static void NCR5380_reselect(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun, phase;
        int len;
        unsigned char msg[3];
        unsigned char *data;
-       struct scsi_cmnd *tmp = NULL, *prev;
-       int abort = 0;
-       NCR5380_setup(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2424,12 +2099,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
-       dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
 
-       /* 
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
+
+       /*
         * At this point, we have detected that our SCSI ID is on the bus,
         * SEL is true and BSY was false for at least one bus settle delay
         * (400 ns).
@@ -2439,103 +2114,110 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       /* FIXME: timeout too long, must fail to workqueue */   
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_SEL, 0, 2*HZ)<0)
-               abort = 1;
-               
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
-        * FIXME: timeout needed and fail to work queeu
         */
 
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 2*HZ))
-               abort = 1;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
        len = 1;
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
 
+       if (len) {
+               do_abort(instance);
+               return;
+       }
+
        if (!(msg[0] & 0x80)) {
-               printk(KERN_ERR "scsi%d : expecting IDENTIFY message, got ", instance->host_no);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
-               abort = 1;
-       } else {
-               /* Accept message by clearing ACK */
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               lun = (msg[0] & 0x07);
+               printk("\n");
+               do_abort(instance);
+               return;
+       }
+       lun = msg[0] & 0x07;
 
-               /* 
-                * We need to add code for SCSI-II to track which devices have
-                * I_T_L_Q nexuses established, and which have simple I_T_L
-                * nexuses so we can chose to do additional data transfer.
-                */
+       /*
+        * We need to add code for SCSI-II to track which devices have
+        * I_T_L_Q nexuses established, and which have simple I_T_L
+        * nexuses so we can chose to do additional data transfer.
+        */
 
-               /* 
-                * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we 
-                * just reestablished, and remove it from the disconnected queue.
-                */
+       /*
+        * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we
+        * just reestablished, and remove it from the disconnected queue.
+        */
 
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
 
-               for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       if ((target_mask == (1 << tmp->device->id)) && (lun == (u8)tmp->device->lun)
-                           ) {
-                               if (prev) {
-                                       REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                       prev->host_scribble = tmp->host_scribble;
-                               } else {
-                                       REMOVE(-1, hostdata->disconnected_queue, tmp, tmp->host_scribble);
-                                       hostdata->disconnected_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                               }
-                               tmp->host_scribble = NULL;
-                               break;
-                       }
-               if (!tmp) {
-                       printk(KERN_ERR "scsi%d : warning : target bitmask %02x lun %d not in disconnect_queue.\n", instance->host_no, target_mask, lun);
-                       /* 
-                        * Since we have an established nexus that we can't do anything with,
-                        * we must abort it.  
-                        */
-                       abort = 1;
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun) {
+                       list_del(&ncmd->list);
+                       tmp = cmd;
+                       break;
                }
        }
 
-       if (abort) {
-               do_abort(instance);
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
        } else {
-               hostdata->connected = tmp;
-               dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %llu, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
+               /*
+                * Since we have an established nexus that we can't do anything
+                * with, we must abort it.
+                */
+               do_abort(instance);
+               return;
        }
+
+       /* Accept message by clearing ACK */
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+
+       hostdata->connected = tmp;
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 /*
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : called by interrupt handler when DMA finishes or a phase
- *      mismatch occurs (which would finish the DMA transfer).  
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
  *
  * Returns : pointer to the scsi_cmnd structure for which the I_T_L
- *      nexus has been reestablished, on failure NULL is returned.
+ * nexus has been reestablished, on failure NULL is returned.
  */
 
 #ifdef REAL_DMA
 static void NCR5380_dma_complete(NCR5380_instance * instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
-       NCR5380_setup(instance);
 
        /*
         * XXX this might not be right.
         *
         * Wait for final byte to transfer, ie wait for ACK to go false.
         *
-        * We should use the Last Byte Sent bit, unfortunately this is 
+        * We should use the Last Byte Sent bit, unfortunately this is
         * not available on the 5380/5381 (only the various CMOS chips)
         *
         * FIXME: timeout, and need to handle long timeout/irq case
@@ -2543,7 +2225,6 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 
        NCR5380_poll_politely(instance, BUS_AND_STATUS_REG, BASR_ACK, 0, 5*HZ);
 
-       NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
@@ -2560,190 +2241,251 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 }
 #endif                         /* def REAL_DMA */
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *      host byte of the result field to, if zero DID_ABORTED is
- *      used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- *     XXX - there is no way to abort the command that is currently
- *     connected, you have to wait for it to complete.  If this is
- *     a problem, we could implement longjmp() / setjmp(), setjmp()
- *     called where the loop started in NCR5380_main().
- *
- * Locks: host lock taken by caller
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       NCR5380_local_declare();
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp, **prev;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_WARNING, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       NCR5380_setup(instance);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
-       dprintk(NDEBUG_ABORT, "        basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 0
-/*
- * Case 1 : If the command is the currently executing command, 
- * we'll set the aborted flag and return control so that 
- * information transfer routine can exit cleanly.
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
  */
 
-       if (hostdata->connected == cmd) {
-               dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
-               hostdata->aborted = 1;
-/*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN);
-/* 
- * Since we can't change phases until we've completed the current 
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-/* 
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               return SUCCESS;
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
        }
-#endif
 
-/* 
- * Case 2 : If the command hasn't been issued yet, we simply remove it 
- *          from the issue queue.
- */
-       dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
-       for (prev = (struct scsi_cmnd **) &(hostdata->issue_queue), tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                       (*prev) = (struct scsi_cmnd *) tmp->host_scribble;
-                       tmp->host_scribble = NULL;
-                       tmp->result = DID_ABORT << 16;
-                       dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
+       }
+
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+       }
+
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
-#if (NDEBUG  & NDEBUG_ABORT)
-       /* KLL */
-               else if (prev == tmp)
-                       printk(KERN_ERR "scsi%d : LOOP\n", instance->host_no);
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
 #endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-/* 
- * Case 3 : If any commands are connected, we're going to fail the abort
- *          and let the high level SCSI driver retry at a later time or 
- *          issue a reset.
- *
- *          Timeouts, and therefore aborted commands, will be highly unlikely
- *          and handling them cleanly in this situation would make the common
- *          case of noresets less efficient, and would pollute our code.  So,
- *          we fail.
- */
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
+       }
 
-       if (hostdata->connected) {
-               dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
-               return FAILED;
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
+               }
        }
-/*
- * Case 4: If the command is currently disconnected from the bus, and 
- *      there are no connected commands, we reconnect the I_T_L or 
- *      I_T_L_Q nexus associated with it, go into message out, and send 
- *      an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select().  The easiest way to implement this 
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we 
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that 
- * device reselected.
- * 
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-                       dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-                       do_abort(instance);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-                       for (prev = (struct scsi_cmnd **) &(hostdata->disconnected_queue), tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                                       *prev = (struct scsi_cmnd *) tmp->host_scribble;
-                                       tmp->host_scribble = NULL;
-                                       tmp->result = DID_ABORT << 16;
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-               }
-/*
- * Case 5 : If we reached this point, the command was not found in any of 
- *          the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
-       printk(KERN_WARNING "scsi%d : warning : SCSI command probably completed successfully\n"
-                       "         before abortion\n", instance->host_no);
-       return FAILED;
+       return result;
 }
 
 
-/* 
- * Function : int NCR5380_bus_reset (struct scsi_cmnd *cmd)
- * 
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
- * Locks: host lock taken by caller
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
 {
        struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-       spin_lock_irq(instance->host_lock);
        do_reset(instance);
-       spin_unlock_irq(instance->host_lock);
+
+       /* reset NCR registers */
+       NCR5380_write(MODE_REG, MR_BASE);
+       NCR5380_write(TARGET_COMMAND_REG, 0);
+       NCR5380_write(SELECT_ENABLE_REG, 0);
+
+       /* After the reset, there are no more connected or disconnected commands
+        * and no busy units; so clear the low-level status here to avoid
+        * conflicts when the mid-level code tries to wake up the affected
+        * commands!
+        */
+
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
+
+       for (i = 0; i < 8; ++i)
+               hostdata->busy[i] = 0;
+#ifdef REAL_DMA
+       hostdata->dma_len = 0;
+#endif
+
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 162112d..a792886 100644 (file)
 #ifndef NCR5380_H
 #define NCR5380_H
 
+#include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_dbg.h>
 #include <scsi/scsi_eh.h>
+#include <scsi/scsi_transport_spi.h>
 
 #define NDEBUG_ARBITRATION     0x1
 #define NDEBUG_AUTOSENSE       0x2
 /* Write any value to this register to start an ini mode DMA receive */
 #define START_DMA_INITIATOR_RECEIVE_REG 7      /* wo */
 
-#define C400_CONTROL_STATUS_REG NCR53C400_register_offset-8    /* rw */
-
+/* NCR 53C400(A) Control Status Register bits: */
 #define CSR_RESET              0x80    /* wo  Resets 53c400 */
 #define CSR_53C80_REG          0x80    /* ro  5380 registers busy */
 #define CSR_TRANS_DIR          0x40    /* rw  Data transfer direction */
 #define CSR_BASE CSR_53C80_INTR
 #endif
 
-/* Number of 128-byte blocks to be transferred */
-#define C400_BLOCK_COUNTER_REG   NCR53C400_register_offset-7   /* rw */
-
-/* Resume transfer after disconnect */
-#define C400_RESUME_TRANSFER_REG NCR53C400_register_offset-6   /* wo */
-
-/* Access to host buffer stack */
-#define C400_HOST_BUFFER         NCR53C400_register_offset-4   /* rw */
-
-
 /* Note : PHASE_* macros are based on the values of the STATUS register */
 #define PHASE_MASK     (SR_MSG | SR_CD | SR_IO)
 
 
 #define PHASE_SR_TO_TCR(phase) ((phase) >> 2)
 
-/*
- * The internal should_disconnect() function returns these based on the 
- * expected length of a disconnect if a device supports disconnect/
- * reconnect.
- */
-
-#define DISCONNECT_NONE                0
-#define DISCONNECT_TIME_TO_DATA        1
-#define DISCONNECT_LONG                2
-
 /* 
  * "Special" value for the (unsigned char) command tag, to indicate
  * I_T_L nexus instead of I_T_L_Q.
 #define NO_IRQ         0
 #endif
 
-#define FLAG_HAS_LAST_BYTE_SENT                1       /* NCR53c81 or better */
-#define FLAG_CHECK_LAST_BYTE_SENT      2       /* Only test once */
-#define FLAG_NCR53C400                 4       /* NCR53c400 */
+#define FLAG_NO_DMA_FIXUP              1       /* No DMA errata workarounds */
 #define FLAG_NO_PSEUDO_DMA             8       /* Inhibit DMA */
-#define FLAG_DTC3181E                  16      /* DTC3181E */
 #define FLAG_LATE_DMA_SETUP            32      /* Setup NCR before DMA H/W */
 #define FLAG_TAGGED_QUEUING            64      /* as X3T9.2 spelled it */
-
-#ifndef ASM
+#define FLAG_TOSHIBA_DELAY             128     /* Allow for borken CD-ROMs */
 
 #ifdef SUPPORT_TAGS
 struct tag_alloc {
@@ -258,33 +238,24 @@ struct NCR5380_hostdata {
        NCR5380_implementation_fields;          /* implementation specific */
        struct Scsi_Host *host;                 /* Host backpointer */
        unsigned char id_mask, id_higher_mask;  /* 1 << id, all bits greater */
-       unsigned char targets_present;          /* targets we have connected
-                                                  to, so we can call a select
-                                                  failure a retryable condition */
-       volatile unsigned char busy[8];         /* index = target, bit = lun */
+       unsigned char busy[8];                  /* index = target, bit = lun */
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-       volatile int dma_len;                   /* requested length of DMA */
+       int dma_len;                            /* requested length of DMA */
 #endif
-       volatile unsigned char last_message;    /* last message OUT */
-       volatile struct scsi_cmnd *connected;   /* currently connected command */
-       volatile struct scsi_cmnd *issue_queue; /* waiting to be issued */
-       volatile struct scsi_cmnd *disconnected_queue;  /* waiting for reconnect */
-       volatile int restart_select;            /* we have disconnected,
-                                                  used to restart 
-                                                  NCR5380_select() */
-       volatile unsigned aborted:1;            /* flag, says aborted */
+       unsigned char last_message;             /* last message OUT */
+       struct scsi_cmnd *connected;            /* currently connected cmnd */
+       struct scsi_cmnd *selecting;            /* cmnd to be connected */
+       struct list_head unissued;              /* waiting to be issued */
+       struct list_head autosense;             /* priority issue queue */
+       struct list_head disconnected;          /* waiting for reconnect */
+       spinlock_t lock;                        /* protects this struct */
        int flags;
-       unsigned long time_expires;             /* in jiffies, set prior to sleeping */
-       int select_time;                        /* timer in select for target response */
-       volatile struct scsi_cmnd *selecting;
-       struct delayed_work coroutine;          /* our co-routine */
        struct scsi_eh_save ses;
+       struct scsi_cmnd *sensing;
        char info[256];
        int read_overruns;                /* number of bytes to cut from a
                                           * transfer to handle chip overruns */
-       int retain_dma_intr;
        struct work_struct main_task;
-       volatile int main_running;
 #ifdef SUPPORT_TAGS
        struct tag_alloc TagAlloc[8][8];        /* 8 targets and 8 LUNs */
 #endif
@@ -292,10 +263,23 @@ struct NCR5380_hostdata {
        unsigned spin_max_r;
        unsigned spin_max_w;
 #endif
+       struct workqueue_struct *work_q;
+       unsigned long accesses_per_ms;  /* chip register accesses per ms */
 };
 
 #ifdef __KERNEL__
 
+struct NCR5380_cmd {
+       struct list_head list;
+};
+
+#define NCR5380_CMD_SIZE               (sizeof(struct NCR5380_cmd))
+
+static inline struct scsi_cmnd *NCR5380_to_scmd(struct NCR5380_cmd *ncmd_ptr)
+{
+       return ((struct scsi_cmnd *)ncmd_ptr) - 1;
+}
+
 #ifndef NDEBUG
 #define NDEBUG (0)
 #endif
@@ -304,6 +288,11 @@ struct NCR5380_hostdata {
        do { if ((NDEBUG) & (flg)) \
                printk(KERN_DEBUG fmt, ## __VA_ARGS__); } while (0)
 
+#define dsprintk(flg, host, fmt, ...) \
+       do { if ((NDEBUG) & (flg)) \
+               shost_printk(KERN_DEBUG, host, fmt, ## __VA_ARGS__); \
+       } while (0)
+
 #if NDEBUG
 #define NCR5380_dprint(flg, arg) \
        do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
@@ -320,6 +309,7 @@ static void NCR5380_print(struct Scsi_Host *instance);
 static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
 #endif
 static int NCR5380_init(struct Scsi_Host *instance, int flags);
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *);
 static void NCR5380_exit(struct Scsi_Host *instance);
 static void NCR5380_information_transfer(struct Scsi_Host *instance);
 #ifndef DONT_USE_INTR
@@ -328,7 +318,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
 static void NCR5380_main(struct work_struct *work);
 static const char *NCR5380_info(struct Scsi_Host *instance);
 static void NCR5380_reselect(struct Scsi_Host *instance);
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd);
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
 #if defined(PSEUDO_DMA) || defined(REAL_DMA) || defined(REAL_DMA_POLL)
 static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
 #endif
@@ -443,5 +433,4 @@ static __inline__ int NCR5380_pc_dma_residual(struct Scsi_Host *instance)
 #endif                         /* defined(i386) || defined(__alpha__) */
 #endif                         /* defined(REAL_DMA)  */
 #endif                         /* __KERNEL__ */
-#endif                         /* ndef ASM */
 #endif                         /* NCR5380_H */
index d28d6c0..221f18c 100644 (file)
@@ -4,9 +4,7 @@
  * Copyright 1995-2002, Russell King
  */
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 
 #include <scsi/scsi_host.h>
 
-#include <scsi/scsicam.h>
-
 #define PSEUDO_DMA
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                struct Scsi_Host *_instance
-#define NCR5380_setup(instance)                _instance = instance
-#define NCR5380_read(reg)              cumanascsi_read(_instance, reg)
-#define NCR5380_write(reg, value)      cumanascsi_write(_instance, reg, value)
+#define NCR5380_read(reg)              cumanascsi_read(instance, reg)
+#define NCR5380_write(reg, value)      cumanascsi_write(instance, reg, value)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_intr                   cumanascsi_intr
 #define NCR5380_queue_command          cumanascsi_queue_command
 #define NCR5380_info                   cumanascsi_info
@@ -211,6 +208,8 @@ static struct scsi_host_template cumanascsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "CumanaSCSI-1",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int cumanascsi1_probe(struct expansion_card *ec,
@@ -240,23 +239,21 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
        host->irq = ec->irq;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
         priv(host)->ctrl = 0;
         writeb(0, priv(host)->base + CTRL);
 
-       host->n_io_port = 255;
-       if (!(request_region(host->io_port, host->n_io_port, "CumanaSCSI-1"))) {
-               ret = -EBUSY;
-               goto out_unmap;
-       }
-
        ret = request_irq(host->irq, cumanascsi_intr, 0,
                          "CumanaSCSI-1", host);
        if (ret) {
                printk("scsi%d: IRQ%d not free: %d\n",
                    host->host_no, host->irq, ret);
-               goto out_unmap;
+               goto out_exit;
        }
 
        ret = scsi_add_host(host, &ec->dev);
@@ -268,6 +265,8 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
  out_free_irq:
        free_irq(host->irq, host);
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
        iounmap(priv(host)->dma);
index 7c6fa14..1fab1d1 100644 (file)
@@ -5,9 +5,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 #define DONT_USE_INTR
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                void __iomem *_base
-#define NCR5380_setup(host)            _base = priv(host)->base
 
-#define NCR5380_read(reg)              readb(_base + ((reg) << 2))
-#define NCR5380_write(reg, value)      writeb(value, _base + ((reg) << 2))
+#define NCR5380_read(reg) \
+       readb(priv(instance)->base + ((reg) << 2))
+#define NCR5380_write(reg, value) \
+       writeb(value, priv(instance)->base + ((reg) << 2))
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_queue_command          oakscsi_queue_command
 #define NCR5380_info                   oakscsi_info
-#define NCR5380_show_info              oakscsi_show_info
 
 #define NCR5380_implementation_fields  \
        void __iomem *base
@@ -103,7 +103,6 @@ printk("reading %p len %d\n", addr, len);
 
 static struct scsi_host_template oakscsi_template = {
        .module                 = THIS_MODULE,
-       .show_info              = oakscsi_show_info,
        .name                   = "Oak 16-bit SCSI",
        .info                   = oakscsi_info,
        .queuecommand           = oakscsi_queue_command,
@@ -115,6 +114,8 @@ static struct scsi_host_template oakscsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "oakscsi",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
@@ -142,15 +143,21 @@ static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
        host->irq = NO_IRQ;
        host->n_io_port = 255;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
        ret = scsi_add_host(host, &ec->dev);
        if (ret)
-               goto out_unmap;
+               goto out_exit;
 
        scsi_scan_host(host);
        goto out;
 
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
  unreg:
index db87ece..e654786 100644 (file)
@@ -1,15 +1,15 @@
 /*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *     to implement 5380 SCSI drivers under Linux with a non-trantor
- *     architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *     Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *     Visionary Computing
- *     (Unix and Linux consulting and custom programming)
- *     drew@colorado.edu
- *     +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
  * For more information, please consult
  *
  * 1+ (800) 334-5454
  */
 
-/*
- * ++roman: To port the 5380 driver to the Atari, I had to do some changes in
- * this file, too:
- *
- *  - Some of the debug statements were incorrect (undefined variables and the
- *    like). I fixed that.
- *
- *  - In information_transfer(), I think a #ifdef was wrong. Looking at the
- *    possible DMA transfer size should also happen for REAL_DMA. I added this
- *    in the #if statement.
- *
- *  - When using real DMA, information_transfer() should return in a DATAOUT
- *    phase after starting the DMA. It has nothing more to do.
- *
- *  - The interrupt service routine should run main after end of DMA, too (not
- *    only after RESELECTION interrupts). Additionally, it should _not_ test
- *    for more interrupts after running main, since a DMA process may have
- *    been started and interrupts are turned on now. The new int could happen
- *    inside the execution of NCR5380_intr(), leading to recursive
- *    calls.
- *
- *  - I've added a function merge_contiguous_buffers() that tries to
- *    merge scatter-gather buffers that are located at contiguous
- *    physical addresses and can be processed with the same DMA setup.
- *    Since most scatter-gather operations work on a page (4K) of
- *    4 buffers (1K), in more than 90% of all cases three interrupts and
- *    DMA setup actions are saved.
- *
- * - I've deleted all the stuff for AUTOPROBE_IRQ, REAL_DMA_POLL, PSEUDO_DMA
- *    and USLEEP, because these were messing up readability and will never be
- *    needed for Atari SCSI.
- *
- * - I've revised the NCR5380_main() calling scheme (relax the 'main_running'
- *   stuff), and 'main' is executed in a bottom half if awoken by an
- *   interrupt.
- *
- * - The code was quite cluttered up by "#if (NDEBUG & NDEBUG_*) printk..."
- *   constructs. In my eyes, this made the source rather unreadable, so I
- *   finally replaced that by the *_PRINTK() macros.
- *
- */
-
-/*
- * Further development / testing that should be done :
- * 1.  Test linked command handling code after Eric is ready with
- *     the high level code.
- */
+/* Ported to Atari by Roman Hodek and others. */
 
 /* Adapted for the sun3 by Sam Creasey. */
 
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x, y)                                             \
-       do {                                                    \
-               printk("LINE:%d   Adding %p to %p\n",           \
-                      __LINE__, (void*)(x), (void*)(y));       \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#define REMOVE(w, x, y, z)                                     \
-       do {                                                    \
-               printk("LINE:%d   Removing: %p->%p  %p->%p \n", \
-                      __LINE__, (void*)(w), (void*)(x),        \
-                      (void*)(y), (void*)(z));                 \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
-
-#ifndef notyet
-#undef LINKED
-#endif
-
 /*
  * Design
  *
  * piece of hardware that requires you to sit in a loop polling for
  * the REQ signal as long as you are connected.  Some devices are
  * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done.  An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  *
  * These macros control options :
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *     for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *     transceivers.
- *
- * LINKED - if defined, linked commands are supported.
+ * transceivers.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * NCR5380_write(register, value) - write to the specific register
  *
  * NCR5380_implementation_fields  - additional fields needed for this
- *      specific implementation of the NCR5380
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
  * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
  * Note that the DMA setup functions should return the number of bytes
- *     that they were able to program the controller for.
+ * that they were able to program the controller for.
  *
  * Also note that generic i386/PC versions of these macros are
- *     available as NCR5380_i386_dma_write_setup,
- *     NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * possible) function may be used.
  */
 
-/* Macros ease life... :-) */
-#define        SETUP_HOSTDATA(in)                              \
-    struct NCR5380_hostdata *hostdata =                        \
-       (struct NCR5380_hostdata *)(in)->hostdata
-#define        HOSTDATA(in) ((struct NCR5380_hostdata *)(in)->hostdata)
-
-#define        NEXT(cmd)               ((struct scsi_cmnd *)(cmd)->host_scribble)
-#define        SET_NEXT(cmd,next)      ((cmd)->host_scribble = (void *)(next))
-#define        NEXTADDR(cmd)           ((struct scsi_cmnd **)&(cmd)->host_scribble)
-
-#define        HOSTNO          instance->host_no
-#define        H_NO(cmd)       (cmd)->device->host->host_no
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
 #ifdef SUPPORT_TAGS
 
  * cannot know it in advance :-( We just see a QUEUE_FULL status being
  * returned. So, in this case, the driver internal queue size assumption is
  * reduced to the number of active tags if QUEUE_FULL is returned by the
- * target. The command is returned to the mid-level, but with status changed
- * to BUSY, since --as I've seen-- the mid-level can't handle QUEUE_FULL
- * correctly.
+ * target.
  *
  * We're also not allowed running tagged commands as long as an untagged
  * command is active. And REQUEST SENSE commands after a contingent allegiance
@@ -304,7 +206,8 @@ static void __init init_tags(struct NCR5380_hostdata *hostdata)
 static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (hostdata->busy[cmd->device->id] & (1 << lun))
                return 1;
@@ -314,8 +217,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
                return 0;
        if (hostdata->TagAlloc[scmd_id(cmd)][lun].nr_allocated >=
            hostdata->TagAlloc[scmd_id(cmd)][lun].queue_size) {
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d: no free tags\n",
+                        scmd_id(cmd), lun);
                return 1;
        }
        return 0;
@@ -330,7 +233,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* If we or the target don't support tagged queuing, allocate the LUN for
         * an untagged command.
@@ -340,18 +244,16 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
            !cmd->device->tagged_supported) {
                cmd->tag = TAG_NONE;
                hostdata->busy[cmd->device->id] |= (1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
-                          "command\n", H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d now allocated by untagged command\n",
+                        scmd_id(cmd), lun);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
 
                cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
                set_bit(cmd->tag, ta->allocated);
                ta->nr_allocated++;
-               dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
-                          "(now %d tags in use)\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id,
-                          lun, ta->nr_allocated);
+               dsprintk(NDEBUG_TAGS, instance, "using tag %d for target %d lun %d (%d tags allocated)\n",
+                        cmd->tag, scmd_id(cmd), lun, ta->nr_allocated);
        }
 }
 
@@ -363,21 +265,22 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_free_tag(struct scsi_cmnd *cmd)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (cmd->tag == TAG_NONE) {
                hostdata->busy[cmd->device->id] &= ~(1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d untagged cmd freed\n",
+                        scmd_id(cmd), lun);
        } else if (cmd->tag >= MAX_TAGS) {
-               printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
-                      H_NO(cmd), cmd->tag);
+               shost_printk(KERN_NOTICE, instance,
+                            "trying to free bad tag %d!\n", cmd->tag);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
                clear_bit(cmd->tag, ta->allocated);
                ta->nr_allocated--;
-               dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "freed tag %d for target %d lun %d\n",
+                        cmd->tag, scmd_id(cmd), lun);
        }
 }
 
@@ -401,17 +304,15 @@ static void free_all_tags(struct NCR5380_hostdata *hostdata)
 
 #endif /* SUPPORT_TAGS */
 
-
-/*
- * Function: void merge_contiguous_buffers( struct scsi_cmnd *cmd )
- *
- * Purpose: Try to merge several scatter-gather requests into one DMA
- *    transfer. This is possible if the scatter buffers lie on
- *    physical contiguous addresses.
- *
- * Parameters: struct scsi_cmnd *cmd
- *    The command to work on. The first scatter buffer's data are
- *    assumed to be already transferred into ptr/this_residual.
+/**
+ * merge_contiguous_buffers - coalesce scatter-gather list entries
+ * @cmd: command requesting IO
+ *
+ * Try to merge several scatter-gather buffers into one DMA transfer.
+ * This is possible if the scatter buffers lie on physically
+ * contiguous addresses. The first scatter-gather buffer's data are
+ * assumed to be already transferred into cmd->SCp.this_residual.
+ * Every buffer merged avoids an interrupt and a DMA setup operation.
  */
 
 static void merge_contiguous_buffers(struct scsi_cmnd *cmd)
@@ -463,9 +364,7 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.buffers_residual = scsi_sg_count(cmd) - 1;
                cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
                cmd->SCp.this_residual = cmd->SCp.buffer->length;
-               /* ++roman: Try to merge some scatter-buffers if they are at
-                * contiguous physical addresses.
-                */
+
                merge_contiguous_buffers(cmd);
        } else {
                cmd->SCp.buffer = NULL;
@@ -473,31 +372,110 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
+}
+
+/**
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
+ */
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+               cpu_relax();
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+       }
+
+       return -ETIMEDOUT;
 }
 
-#include <linux/delay.h>
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
 } signals[] = {
-       { SR_DBP, "PARITY"}, { SR_RST, "RST" }, { SR_BSY, "BSY" },
-       { SR_REQ, "REQ" }, { SR_MSG, "MSG" }, { SR_CD,  "CD" }, { SR_IO, "IO" },
-       { SR_SEL, "SEL" }, {0, NULL}
-}, basrs[] = {
-       {BASR_ATN, "ATN"}, {BASR_ACK, "ACK"}, {0, NULL}
-}, icrs[] = {
-       {ICR_ASSERT_RST, "ASSERT RST"},{ICR_ASSERT_ACK, "ASSERT ACK"},
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, {ICR_ASSERT_SEL, "ASSERT SEL"},
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, mrs[] = {
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, {MR_TARGET, "MODE TARGET"},
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, {MR_ENABLE_PAR_INTR,
-       "MODE PARITY INTR"}, {MR_ENABLE_EOP_INTR,"MODE EOP INTR"},
+},
+basrs[] = {
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
+       {0, NULL}
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {0, NULL}
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
        {MR_MONITOR_BSY, "MODE MONITOR BSY"},
-       {MR_DMA_MODE, "MODE DMA"}, {MR_ARBITRATE, "MODE ARBITRATION"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
@@ -511,15 +489,13 @@ static struct {
 static void NCR5380_print(struct Scsi_Host *instance)
 {
        unsigned char status, data, basr, mr, icr, i;
-       unsigned long flags;
 
-       local_irq_save(flags);
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
        mr = NCR5380_read(MODE_REG);
        icr = NCR5380_read(INITIATOR_COMMAND_REG);
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       local_irq_restore(flags);
+
        printk("STATUS_REG: %02x ", status);
        for (i = 0; signals[i].mask; ++i)
                if (status & signals[i].mask)
@@ -543,8 +519,12 @@ static struct {
        unsigned char value;
        const char *name;
 } phases[] = {
-       {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"},
-       {PHASE_STATIN, "STATIN"}, {PHASE_MSGOUT, "MSGOUT"}, {PHASE_MSGIN, "MSGIN"},
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
        {PHASE_UNKNOWN, "UNKNOWN"}
 };
 
@@ -553,8 +533,6 @@ static struct {
  * @instance: adapter to dump
  *
  * Print the current SCSI phase for debugging purposes
- *
- * Locks: none
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
@@ -564,54 +542,21 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk(KERN_DEBUG "scsi%d: REQ not asserted, phase unknown.\n", HOSTNO);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
                for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
                     (phases[i].value != (status & PHASE_MASK)); ++i)
                        ;
-               printk(KERN_DEBUG "scsi%d: phase %s\n", HOSTNO, phases[i].name);
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
-
 #endif
 
-/*
- * ++roman: New scheme of calling NCR5380_main()
- *
- * If we're not in an interrupt, we can call our main directly, it cannot be
- * already running. Else, we queue it on a task queue, if not 'main_running'
- * tells us that a lower level is already executing it. This way,
- * 'main_running' needs not be protected in a special way.
- *
- * queue_main() is a utility function for putting our main onto the task
- * queue, if main_running is false. It should be called only from a
- * interrupt or bottom half.
- */
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-static inline void queue_main(struct NCR5380_hostdata *hostdata)
-{
-       if (!hostdata->main_running) {
-               /* If in interrupt and NCR5380_main() not already running,
-                  queue it on the 'immediate' task queue, to be processed
-                  immediately after the current interrupt processing has
-                  finished. */
-               schedule_work(&hostdata->main_task);
-       }
-       /* else: nothing to do: the running NCR5380_main() will pick up
-          any newly queued command. */
-}
-
 /**
  * NCR58380_info - report driver and host information
  * @instance: relevant scsi host instance
  *
  * For use as the host template info() handler.
- *
- * Locks: none
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -630,13 +575,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "base 0x%lx, irq %d, "
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
-                "flags { %s}, "
+                "flags { %s%s}, "
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
                 hostdata->flags & FLAG_TAGGED_QUEUING ? "TAGGED_QUEUING " : "",
+                hostdata->flags & FLAG_TOSHIBA_DELAY  ? "TOSHIBA_DELAY "  : "",
 #ifdef DIFFERENTIAL
                 "DIFFERENTIAL "
 #endif
@@ -652,102 +598,6 @@ static void prepare_info(struct Scsi_Host *instance)
                 "");
 }
 
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * to aid debugging.
- */
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd)
-{
-       int i, s;
-       unsigned char *command;
-       printk("scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       printk(KERN_CONT "        command = ");
-       command = cmd->cmnd;
-       printk(KERN_CONT "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               printk(KERN_CONT " %02x", command[i]);
-       printk("\n");
-}
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       printk("NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               printk("scsi%d: no currently connected command\n", HOSTNO);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected);
-       printk("scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       printk("scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       local_irq_restore(flags);
-       printk("\n");
-}
-
-static void show_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       int i, s;
-       unsigned char *command;
-       seq_printf(m, "scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       command = cmd->cmnd;
-       seq_printf(m, "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, " %02x", command[i]);
-       seq_putc(m, '\n');
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-                                            struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       seq_printf(m, "NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", HOSTNO);
-       else
-               show_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       local_irq_restore(flags);
-       return 0;
-}
-
 /**
  * NCR5380_init - initialise an NCR5380
  * @instance: adapter to configure
@@ -764,11 +614,11 @@ static int __maybe_unused NCR5380_show_info(struct seq_file *m,
 
 static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
-       SETUP_HOSTDATA(instance);
+       unsigned long deadline;
 
        hostdata->host = instance;
-       hostdata->aborted = 0;
        hostdata->id_mask = 1 << instance->this_id;
        hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
@@ -782,13 +632,21 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 #if defined (REAL_DMA)
        hostdata->dma_len = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
+
        hostdata->flags = flags;
 
        INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -797,6 +655,72 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
+
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
+
+       for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
+               switch (pass) {
+               case 1:
+               case 3:
+               case 5:
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
+                       break;
+               case 2:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
+                       do_abort(instance);
+                       break;
+               case 4:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
+                       do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
+                       break;
+               case 6:
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
+                       return -ENXIO;
+               }
+       }
        return 0;
 }
 
@@ -812,6 +736,38 @@ static void NCR5380_exit(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
+}
+
+/**
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+#ifdef SUPPORT_TAGS
+       cmd_free_tag(cmd);
+#else
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+#endif
+       cmd->scsi_done(cmd);
 }
 
 /**
@@ -819,7 +775,7 @@ static void NCR5380_exit(struct Scsi_Host *instance)
  * @instance: the relevant SCSI adapter
  * @cmd: SCSI command
  *
- * cmd is added to the per instance issue_queue, with minor
+ * cmd is added to the per-instance issue queue, with minor
  * twiddling done to the host specific fields of cmd.  If the
  * main coroutine is not running, it is restarted.
  */
@@ -828,44 +784,23 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
                                  struct scsi_cmnd *cmd)
 {
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       struct scsi_cmnd *tmp;
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
        unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
-                      H_NO(cmd));
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
                cmd->scsi_done(cmd);
                return 0;
        }
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /*
-        * We use the host_scribble field as a pointer to the next command
-        * in a queue
-        */
-
-       SET_NEXT(cmd, NULL);
        cmd->result = 0;
 
        /*
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE
-        * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the
-        * sense data is only guaranteed to be valid while the condition exists.
-        */
-
-       /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
-        * Otherwise a running NCR5380_main may steal the lock.
-        * Lock before actually inserting due to fairness reasons explained in
-        * atari_scsi.c. If we insert first, then it's impossible for this driver
-        * to release the lock.
-        * Stop timer for this command while waiting for the lock, or timeouts
-        * may happen (and they really do), and it's no good if the command doesn't
-        * appear in any of the queues.
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which would
         * alter queues and touch the lock.
@@ -873,7 +808,7 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
        if (!NCR5380_acquire_dma_irq(instance))
                return SCSI_MLQUEUE_HOST_BUSY;
 
-       local_irq_save(flags);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
        /*
         * Insert the cmd into the issue queue. Note that REQUEST SENSE
@@ -882,33 +817,18 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               SET_NEXT(cmd, hostdata->issue_queue);
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-                    NEXT(tmp); tmp = NEXT(tmp))
-                       ;
-               LIST(cmd, tmp);
-               SET_NEXT(tmp, cmd);
-       }
-       local_irq_restore(flags);
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
 
-       dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
-                 (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       /* If queue_command() is called from an interrupt (real one or bottom
-        * half), we let queue_main() do the job of taking care about main. If it
-        * is already running, this is a no-op, else main will be queued.
-        *
-        * If we're not in an interrupt, we can call NCR5380_main()
-        * unconditionally, because it cannot be already running.
-        */
-       if (in_interrupt() || irqs_disabled())
-               queue_main(hostdata);
-       else
-               NCR5380_main(&hostdata->main_task);
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+
+       /* Kick off command processing */
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
@@ -917,13 +837,78 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* Caller does the locking needed to set & test these data atomically */
-       if (!hostdata->disconnected_queue &&
-           !hostdata->issue_queue &&
+       if (list_empty(&hostdata->disconnected) &&
+           list_empty(&hostdata->unissued) &&
+           list_empty(&hostdata->autosense) &&
            !hostdata->connected &&
-           !hostdata->retain_dma_intr)
+           !hostdata->selecting)
                NCR5380_release_dma_irq(instance);
 }
 
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (
+#ifdef SUPPORT_TAGS
+                           !is_lun_busy(cmd, 1)
+#else
+                           !(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))
+#endif
+                       ) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
+
 /**
  * NCR5380_main - NCR state machines
  *
@@ -931,8 +916,6 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
  * be done on the NCR5380 host adapters in a system.  Both
  * NCR5380_queue_command() and NCR5380_intr() will try to start it
  * in case it is not running.
- *
- * Locks: called as its own thread with no locks held.
  */
 
 static void NCR5380_main(struct work_struct *work)
@@ -940,154 +923,69 @@ static void NCR5380_main(struct work_struct *work)
        struct NCR5380_hostdata *hostdata =
                container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       unsigned long flags;
 
        /*
-        * We run (with interrupts disabled) until we're sure that none of
-        * the host adapters have anything that can be done, at which point
-        * we set main_running to 0 and exit.
-        *
-        * Interrupts are enabled before doing various other internal
-        * instructions, after we've decided that we need to run through
-        * the loop again.
-        *
-        * this should prevent any race conditions.
-        *
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which can
         * alter queues and touch the Falcon lock.
         */
 
-       /* Tell int handlers main() is now already executing.  Note that
-          no races are possible here. If an int comes in before
-          'main_running' is set here, and queues/executes main via the
-          task queue, it doesn't do any harm, just this instance of main
-          won't find any work left to do. */
-       if (hostdata->main_running)
-               return;
-       hostdata->main_running = 1;
-
-       local_save_flags(flags);
        do {
-               local_irq_disable();    /* Freeze request queues */
                done = 1;
 
-               if (!hostdata->connected) {
-                       dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-#if (NDEBUG & NDEBUG_LISTS)
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL;
-                            tmp && (tmp != prev); prev = tmp, tmp = NEXT(tmp))
-                               ;
-                       /*printk("%p  ", tmp);*/
-                       if ((tmp == prev) && tmp)
-                               printk(" LOOP\n");
-                       /* else printk("\n"); */
-#endif
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
-                            prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp)) {
-                               u8 lun = tmp->device->lun;
-
-                               dprintk(NDEBUG_LISTS,
-                                       "MAIN tmp=%p target=%d busy=%d lun=%d\n",
-                                       tmp, scmd_id(tmp), hostdata->busy[scmd_id(tmp)],
-                                       lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               /* ++guenther: possible race with Falcon locking */
-                               if (
-#ifdef SUPPORT_TAGS
-                                   !is_lun_busy( tmp, tmp->cmnd[0] != REQUEST_SENSE)
-#else
-                                   !(hostdata->busy[tmp->device->id] & (1 << lun))
-#endif
-                                   ) {
-                                       /* ++guenther: just to be sure, this must be atomic */
-                                       local_irq_disable();
-                                       if (prev) {
-                                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                                               SET_NEXT(prev, NEXT(tmp));
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, NEXT(tmp));
-                                               hostdata->issue_queue = NEXT(tmp);
-                                       }
-                                       SET_NEXT(tmp, NULL);
-                                       hostdata->retain_dma_intr++;
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /* reenable interrupts after finding one */
-                                       local_irq_restore(flags);
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       /*
-                                        * Attempt to establish an I_T_L nexus here.
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying.
-                                        */
-                                       dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
-                                                   "lun %d removed from issue_queue\n",
-                                                   HOSTNO, tmp->device->id, lun);
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
-                                       /* ++roman: ...and the standard also requires that
-                                        * REQUEST SENSE command are untagged.
-                                        */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+                       /* ++roman: ...and the standard also requires that
+                        * REQUEST SENSE command are untagged.
+                        */
 
 #ifdef SUPPORT_TAGS
-                                       cmd_get_tag(tmp, tmp->cmnd[0] != REQUEST_SENSE);
+                       cmd_get_tag(cmd, cmd->cmnd[0] != REQUEST_SENSE);
 #endif
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               local_irq_disable();
-                                               hostdata->retain_dma_intr--;
-                                               /* release if target did not response! */
-                                               maybe_release_dma_irq(instance);
-                                               local_irq_restore(flags);
-                                               break;
-                                       } else {
-                                               local_irq_disable();
-                                               LIST(tmp, hostdata->issue_queue);
-                                               SET_NEXT(tmp, hostdata->issue_queue);
-                                               hostdata->issue_queue = tmp;
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
+                               maybe_release_dma_irq(instance);
+                       } else {
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
 #ifdef SUPPORT_TAGS
-                                               cmd_free_tag(tmp);
+                               cmd_free_tag(cmd);
 #endif
-                                               hostdata->retain_dma_intr--;
-                                               local_irq_restore(flags);
-                                               dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
-                                                           "returned to issue_queue\n", HOSTNO);
-                                               if (hostdata->connected)
-                                                       break;
-                                       }
-                               } /* if target/lun/target queue is not busy */
-                       } /* for issue_queue */
-               } /* if (!hostdata->connected) */
-
+                       }
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dma_len
 #endif
                    ) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
-                                   HOSTNO);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
                        done = 0;
                }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-
-       /* Better allow ints _after_ 'main_running' has been cleared, else
-          an interrupt could believe we'll pick up the work it left for
-          us, but we won't see it anymore here... */
-       hostdata->main_running = 0;
-       local_irq_restore(flags);
 }
 
 
@@ -1096,27 +994,20 @@ static void NCR5380_main(struct work_struct *work)
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : Called by interrupt handler when DMA finishes or a phase
- *     mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 static void NCR5380_dma_complete(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
        unsigned char **data;
-       volatile int *count;
+       int *count;
        int saved_data = 0, overrun = 0;
        unsigned char p;
 
-       if (!hostdata->connected) {
-               printk(KERN_WARNING "scsi%d: received end of DMA interrupt with "
-                      "no connected cmd\n", HOSTNO);
-               return;
-       }
-
        if (hostdata->read_overruns) {
                p = hostdata->connected->SCp.phase;
                if (p & SR_IO) {
@@ -1126,15 +1017,11 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
                            (BASR_PHASE_MATCH|BASR_ACK)) {
                                saved_data = NCR5380_read(INPUT_DATA_REG);
                                overrun = 1;
-                               dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
+                               dsprintk(NDEBUG_DMA, instance, "read overrun handled\n");
                        }
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
-                  HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
-                  NCR5380_read(STATUS_REG));
-
 #if defined(CONFIG_SUN3)
        if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
                pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
@@ -1153,9 +1040,9 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
        }
 #endif
 
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
        hostdata->dma_len = 0;
@@ -1194,140 +1081,160 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
  * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
  * from the disconnected queue, and restarting NCR5380_main()
  * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
 static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
        struct Scsi_Host *instance = dev_id;
-       int done = 1, handled = 0;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
+       unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-       /* Look for pending interrupts */
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
-       /* dispatch to appropriate routine if found and done=0 */
        if (basr & BASR_IRQ) {
-               NCR5380_dprint(NDEBUG_INTR, instance);
-               if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
-                       done = 0;
-                       dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
-                       NCR5380_reselect(instance);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if (basr & BASR_PARITY_ERROR) {
-                       dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                       dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       /*
-                        * The rest of the interrupt conditions can occur only during a
-                        * DMA transfer
-                        */
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
 #if defined(REAL_DMA)
-                       /*
-                        * We should only get PHASE MISMATCH and EOP interrupts if we have
-                        * DMA enabled, so do a sanity check based on the current setting
-                        * of the MODE register.
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
                         */
 
-                       if ((NCR5380_read(MODE_REG) & MR_DMA_MODE) &&
-                           ((basr & BASR_END_DMA_TRANSFER) ||
-                            !(basr & BASR_PHASE_MATCH))) {
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                               dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
-                               NCR5380_dma_complete( instance );
-                               done = 0;
-                       } else
+                       if (hostdata->connected) {
+                               NCR5380_dma_complete(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       } else {
+                               NCR5380_write(MODE_REG, MR_BASE);
+                               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+                       }
+               } else
 #endif /* REAL_DMA */
-                       {
-/* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
-                               if (basr & BASR_PHASE_MATCH)
-                                       dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
-                                              "BASR 0x%x, MR 0x%x, SR 0x%x\n",
-                                              HOSTNO, basr, NCR5380_read(MODE_REG),
-                                              NCR5380_read(STATUS_REG));
-                               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       }
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
 #ifdef SUN3_SCSI_VME
-                               dregs->csr |= CSR_DMA_ENABLE;
+                       dregs->csr |= CSR_DMA_ENABLE;
 #endif
-                       }
-               } /* if !(SELECTION || PARITY) */
+               }
                handled = 1;
-       } /* BASR & IRQ */ else {
-               printk(KERN_NOTICE "scsi%d: interrupt without IRQ bit set in BASR, "
-                      "BASR 0x%X, MR 0x%X, SR 0x%x\n", HOSTNO, basr,
-                      NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
 #ifdef SUN3_SCSI_VME
                dregs->csr |= CSR_DMA_ENABLE;
 #endif
        }
 
-       if (!done) {
-               dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
-               /* Put a call to NCR5380_main() on the queue... */
-               queue_main(shost_priv(instance));
-       }
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
        return IRQ_RETVAL(handled);
 }
 
 /*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *     including ARBITRATION, SELECTION, and initial message out for
- *     IDENTIFY and queue messages.
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
  *
  * Inputs : instance - instantiation of the 5380 driver on which this
- *     target lives, cmd - SCSI command to execute.
+ * target lives, cmd - SCSI command to execute.
  *
- * Returns : -1 if selection could not execute for some reason,
- *     0 if selection succeeded or failed because the target
- *     did not respond.
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
  *
  * Side effects :
- *     If bus busy, arbitration failed, etc, NCR5380_select() will exit
- *             with registers as they should have been on entry - ie
- *             SELECT_ENABLE will be set appropriately, the NCR5380
- *             will cease to drive any SCSI bus signals.
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
  *
- *     If successful : I_T_L or I_T_L_Q nexus will be established,
- *             instance->connected will be set to cmd.
- *             SELECT interrupt will be disabled.
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
  *
- *     If failed (no target) : cmd->scsi_done() will be called, and the
- *             cmd->result host byte set to DID_BAD_TARGET.
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
 
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned long flags;
+       int err;
 
-       hostdata->restart_select = 0;
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
-                  instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
        /*
         * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
-       local_irq_save(flags);
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               return -1;
-       }
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
        /*
@@ -1337,96 +1244,77 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
-       local_irq_restore(flags);
-
-       /* Wait for arbitration logic to complete */
-#if defined(NCR_TIMEOUT)
-       {
-               unsigned long timeout = jiffies + 2*NCR_TIMEOUT;
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-               while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-                      time_before(jiffies, timeout) && !hostdata->connected)
-                       ;
-               if (time_after_eq(jiffies, timeout)) {
-                       printk("scsi : arbitration timeout at %d\n", __LINE__);
-                       NCR5380_write(MODE_REG, MR_BASE);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
        }
-#else /* NCR_TIMEOUT */
-       while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-              !hostdata->connected)
-               ;
-#endif
-
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
-
-       if (hostdata->connected) {
+       if (err < 0) {
                NCR5380_write(MODE_REG, MR_BASE);
-               return -1;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
-       /*
-        * The arbitration delay is 2.2us, but this is a minimum and there is
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
+       spin_unlock_irq(&hostdata->lock);
 
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
        if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
            (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
-           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
+           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
-                          HOSTNO);
-               return -1;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
 
-       /* after/during arbitration, BSY should be asserted.
-          IBM DPES-31080 Version S31Q works now */
-       /* Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman) */
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
        NCR5380_write(INITIATOR_COMMAND_REG,
                      ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
 
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
-                          HOSTNO);
-               return -1;
-       }
-
        /*
         * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-#ifdef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-       /* ++roman: But some targets (see above :-) seem to need a bit more... */
-       udelay(15);
-#else
-       udelay(2);
-#endif
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
 
-       if (hostdata->connected) {
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
+
+       if (!hostdata->selecting) {
                NCR5380_write(MODE_REG, MR_BASE);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
+               goto out;
        }
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
 
        /*
         * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << cmd->device->id)));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
        /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
@@ -1434,22 +1322,18 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY |
-                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL ));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
        /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
-
-       if (hostdata->connected) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
-       }
-
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
         * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
@@ -1457,8 +1341,8 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA |
-                     ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
        /*
         * Something weird happens when we cease to drive BSY - looks
@@ -1479,45 +1363,39 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
        /*
         * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
-
-       /*
-        * XXX very interesting - we're seeing a bounce where the BSY we
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-#if 0
-       /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert
-        * IO while SEL is true. But again, there are some disks out the in the
-        * world that do that nevertheless. (Somebody claimed that this announces
-        * reselection capability of the target.) So we better skip that test and
-        * only wait for BSY... (Famous german words: Der Klügere gibt nach :-)
-        */
-
-       while (time_before(jiffies, timeout) &&
-              !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO)))
-               ;
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk(KERN_ERR "scsi%d: reselection after won arbitration?\n",
-                      HOSTNO);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-#else
-       while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY))
-               ;
-#endif
 
        /*
         * No less than two deskew delays after the initiator detects the
@@ -1525,32 +1403,9 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * change the DATA BUS.                                     -wingel
         */
 
-       udelay(1);
-
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << cmd->device->id)) {
-                       printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
-                       if (hostdata->restart_select)
-                               printk(KERN_NOTICE "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_ANY, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-#ifdef SUPPORT_TAGS
-               cmd_free_tag(cmd);
-#endif
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-
-       hostdata->targets_present |= (1 << cmd->device->id);
+       udelay(1);
+
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
        /*
         * Since we followed the SCSI spec, and raised ATN while SEL
@@ -1563,16 +1418,27 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *           which claim to support tagged queuing but fail when more than
-        *           some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
-                  HOSTNO, cmd->device->id);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
+       }
+
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(1, cmd->device->lun);
 
 #ifdef SUPPORT_TAGS
@@ -1591,11 +1457,12 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
 #ifndef SUPPORT_TAGS
-       hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 #endif
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
@@ -1603,24 +1470,30 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        initialize_SCp(cmd);
 
-       return 0;
+       cmd = NULL;
+
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
 /*
  * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes are transferred or exit
- *     is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *     Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
@@ -1635,9 +1508,9 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       register unsigned char p = *phase, tmp;
-       register int c = *count;
-       register unsigned char *d = *data;
+       unsigned char p = *phase, tmp;
+       int c = *count;
+       unsigned char *d = *data;
 
        /*
         * The NCR5380 chip will only drive the SCSI bus when the
@@ -1652,14 +1525,15 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                 * Wait for assertion of REQ, after which the phase bits will be
                 * valid
                 */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-                       ;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
                        NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
@@ -1684,35 +1558,36 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
-               /*
               * We have several special cases to consider during REQ/ACK handshaking :
               * 1.  We were in MSGOUT phase, and we are on the last byte of the
               *      message.  ATN must be dropped as ACK is dropped.
               *
               * 2.  We are in a MSGIN phase, and we are on the last byte of the
               *      message.  We must exit with ACK asserted, so that the calling
               *      code may raise ATN before dropping ACK to reject the message.
               *
               * 3.  ACK and ATN are clear and the target may proceed as normal.
               */
+/*
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
* message.  ATN must be dropped as ACK is dropped.
+ *
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
* message.  We must exit with ACK asserted, so that the calling
* code may raise ATN before dropping ACK to reject the message.
+ *
+ * 3.  ACK and ATN are clear and the target may proceed as normal.
+ */
                if (!(p == PHASE_MSGIN && c == 1)) {
                        if (p == PHASE_MSGOUT && c > 1)
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1721,16 +1596,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
        /* The phase read from the bus is valid if either REQ is (already)
-        * asserted or if ACK hasn't been released yet. The latter is the case if
-        * we're in MSGIN and all wanted bytes have been received.
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
         */
-       if ((tmp & SR_REQ) || (p == PHASE_MSGIN && c == 0))
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1741,19 +1616,45 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                return -1;
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
+/**
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
+ *
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- * Purpose : abort the currently established nexus.  Should only be
- *     called from a routine which can drop into a
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
+ */
+
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
+       udelay(50);
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
+}
+
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
  *
- * Returns 0 on success, -1 on failure.
+ * Returns 0 on success, -1 on failure.
  */
 
 static int do_abort(struct Scsi_Host *instance)
 {
-       unsigned char tmp, *msgptr, phase;
+       unsigned char *msgptr, phase, tmp;
        int len;
+       int rc;
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1768,16 +1669,20 @@ static int do_abort(struct Scsi_Host *instance)
         * the target sees, so we just handshake.
         */
 
-       while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-               ;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                             ICR_ASSERT_ACK);
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
        }
 
@@ -1793,26 +1698,29 @@ static int do_abort(struct Scsi_Host *instance)
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA)
 /*
  * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *     or pseudo DMA.
+ * or pseudo DMA.
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes or transferred or exit
- *     is in same phase.
- *
- *     Also, *phase, *count, *data are modified in place.
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
@@ -1820,10 +1728,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
-       unsigned long flags;
 
 #if defined(CONFIG_SUN3)
        /* sanity check */
@@ -1834,29 +1741,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        }
        hostdata->dma_len = c;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-               instance->host_no, (p & SR_IO) ? "reading" : "writing",
-               c, (p & SR_IO) ? "to" : "from", *data);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 
        /* netbsd turns off ints here, why not be safe and do it too */
-       local_irq_save(flags);
 
        /* send start chain */
        sun3scsi_dma_start(c, *data);
 
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
        if (p & SR_IO) {
-               NCR5380_write(TARGET_COMMAND_REG, 1);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, 0);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
        } else {
-               NCR5380_write(TARGET_COMMAND_REG, 0);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_DATA);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_SEND_REG, 0);
        }
 
@@ -1864,8 +1764,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        dregs->csr |= CSR_DMA_ENABLE;
 #endif
 
-       local_irq_restore(flags);
-
        sun3_dma_active = 1;
 
 #else /* !defined(CONFIG_SUN3) */
@@ -1880,25 +1778,20 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        if (hostdata->read_overruns && (p & SR_IO))
                c -= hostdata->read_overruns;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-                  HOSTNO, (p & SR_IO) ? "reading" : "writing",
-                  c, (p & SR_IO) ? "to" : "from", d);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, d);
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-#ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
-#endif /* def REAL_DMA  */
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 
        if (!(hostdata->flags & FLAG_LATE_DMA_SETUP)) {
                /* On the Medusa, it is a must to initialize the DMA before
                 * starting the NCR. This is also the cleaner way for the TT.
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 
        if (p & SR_IO)
@@ -1912,11 +1805,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                /* On the Falcon, the DMA setup must be done after the last */
                /* NCR access, else the DMA setup gets trashed!
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 #endif /* !defined(CONFIG_SUN3) */
 
@@ -1928,23 +1819,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
  * Purpose : run through the various SCSI phases and do as the target
- *     directs us to.  Operates on the currently connected command,
- *     instance->connected.
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
  * Side effects : SCSI things happen, the disconnected queue will be
- *     modified if a command disconnects, *instance->connected will
- *     change.
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
  * XXX Note : we need to watch for bus free or a reset condition here
- *     to recover from an unexpected bus free condition.
+ * to recover from an unexpected bus free condition.
  */
 
 static void NCR5380_information_transfer(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
-       unsigned long flags;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -1953,13 +1843,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
+       struct scsi_cmnd *cmd;
 
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
 #endif
 
-       while (1) {
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -1984,7 +1876,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                /* this command setup for dma yet? */
                                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != cmd)) {
                                        if (cmd->request->cmd_type == REQ_TYPE_FS) {
-                                               sun3scsi_dma_setup(d, count,
+                                               sun3scsi_dma_setup(instance, d, count,
                                                                   rq_data_dir(cmd->request));
                                                sun3_dma_setup_done = cmd;
                                        }
@@ -2000,11 +1892,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                                             ICR_ASSERT_ACK);
+                                             ICR_ASSERT_ACK);
                                while (NCR5380_read(STATUS_REG) & SR_REQ)
                                        ;
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_ATN);
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
@@ -2012,12 +1904,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        switch (phase) {
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d: NDEBUG_NO_DATAOUT set, attempted DATAOUT "
-                                      "aborted\n", HOSTNO);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
                        case PHASE_DATAIN:
@@ -2031,13 +1922,10 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       /* ++roman: Try to merge some scatter-buffers if
-                                        * they are at contiguous physical addresses.
-                                        */
                                        merge_contiguous_buffers(cmd);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
-                                                  HOSTNO, cmd->SCp.this_residual,
-                                                  cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
 
                                /*
@@ -2051,16 +1939,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                 */
 
                                /* ++roman: I suggest, this should be
-                                *   #if def(REAL_DMA)
+                                * #if def(REAL_DMA)
                                 * instead of leaving REAL_DMA out.
                                 */
 
 #if defined(REAL_DMA)
-                               if (
 #if !defined(CONFIG_SUN3)
-                                   !cmd->device->borken &&
+                               transfersize = 0;
+                               if (!cmd->device->borken)
 #endif
-                                   (transfersize = NCR5380_dma_xfer_len(instance, cmd, phase)) >= DMA_MIN_SIZE) {
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize >= DMA_MIN_SIZE) {
                                        len = transfersize;
                                        cmd->SCp.phase = phase;
                                        if (NCR5380_transfer_dma(instance, &phase,
@@ -2068,16 +1958,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                /*
                                                 * If the watchdog timer fires, all future
                                                 * accesses to this device will use the
-                                                * polled-IO. */
+                                                * polled-IO.
+                                                */
                                                scmd_printk(KERN_INFO, cmd,
                                                        "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                                       ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else {
 #ifdef REAL_DMA
@@ -2093,9 +1982,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        }
                                } else
 #endif /* defined(REAL_DMA) */
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
                                        NCR5380_transfer_pio(instance, &phase,
-                                                            (int *)&cmd->SCp.this_residual,
-                                                            (unsigned char **)&cmd->SCp.ptr);
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
                                /* if we had intended to dma that command clear it */
                                if (sun3_dma_setup_done == cmd)
@@ -2105,162 +1998,64 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        case PHASE_MSGIN:
                                len = 1;
                                data = &tmp;
-                               NCR5380_write(SELECT_ENABLE_REG, 0);    /* disable reselects */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                               /*
-                                * Linking lets us reduce the time required to get the
-                                * next command out to the device, hopefully this will
-                                * mean we don't waste another revolution due to the delays
-                                * required by ARBITRATION and another SELECTION.
-                                *
-                                * In the current implementation proposal, low level drivers
-                                * merely have to start the next command, pointed to by
-                                * next_link, done() is called as with unlinked commands.
-                                */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked command "
-                                                  "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
-
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /*
-                                        * Sanity check : A linked command should only terminate
-                                        * with one of these messages if there are more linked
-                                        * commands available.
-                                        */
-
-                                       if (!cmd->next_link) {
-                                                printk(KERN_NOTICE "scsi%d: target %d lun %llu "
-                                                       "linked command complete, no next_link\n",
-                                                       HOSTNO, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process; copy it
-                                        * and don't free it! */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked request "
-                                                  "done, calling scsi_done().\n",
-                                                  HOSTNO, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %llu "
-                                                 "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr++;
                                        hostdata->connected = NULL;
 #ifdef SUPPORT_TAGS
                                        cmd_free_tag(cmd);
                                        if (status_byte(cmd->SCp.Status) == QUEUE_FULL) {
-                                               /* Turn a QUEUE FULL status into BUSY, I think the
-                                                * mid level cannot handle QUEUE FULL :-( (The
-                                                * command is retried after BUSY). Also update our
-                                                * queue size to the number of currently issued
-                                                * commands now.
-                                                */
-                                               /* ++Andreas: the mid level code knows about
-                                                  QUEUE_FULL now. */
-                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][cmd->device->lun];
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu returned "
-                                                          "QUEUE_FULL after %d commands\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun,
-                                                          ta->nr_allocated);
+                                               u8 lun = cmd->device->lun;
+                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
+
+                                               dsprintk(NDEBUG_TAGS, instance,
+                                                        "QUEUE_FULL %p target %d lun %d nr_allocated %d\n",
+                                                        cmd, scmd_id(cmd), lun, ta->nr_allocated);
                                                if (ta->queue_size > ta->nr_allocated)
-                                                       ta->nr_allocated = ta->queue_size;
+                                                       ta->queue_size = ta->nr_allocated;
                                        }
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 #endif
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-
-                                       /*
-                                        * I'm not sure what the correct thing to do here is :
-                                        *
-                                        * If the command that just executed is NOT a request
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        *
-                                        * If it was a REQUEST SENSE command, we need some way to
-                                        * differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the
-                                        * result code unchanged.
-                                        *
-                                        * The non-obvious place is where the REQUEST SENSE failed
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
-
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) &&
-                                           (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
 
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
-
-                                               LIST(cmd,hostdata->issue_queue);
-                                               SET_NEXT(cmd, hostdata->issue_queue);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
-                                                         "issue queue\n", H_NO(cmd));
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       local_irq_restore(flags);
-
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr--;
-                                       /* ++roman: For Falcon SCSI, release the lock on the
-                                        * ST-DMA here if no other commands are waiting on the
-                                        * disconnected queue.
-                                        */
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        switch (hostdata->last_message) {
                                        case HEAD_OF_QUEUE_TAG:
                                        case ORDERED_QUEUE_TAG:
@@ -2274,27 +2069,20 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                cmd->device->tagged_supported = 0;
                                                hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
                                                cmd->tag = TAG_NONE;
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu rejected "
-                                                          "QUEUE_TAG message; tagged queuing "
-                                                          "disabled\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun);
+                                               dsprintk(NDEBUG_TAGS, instance, "target %d lun %llu rejected QUEUE_TAG message; tagged queuing disabled\n",
+                                                        scmd_id(cmd), cmd->device->lun);
                                                break;
                                        }
                                        break;
                                case DISCONNECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       local_irq_save(flags);
-                                       cmd->device->disconnect = 1;
-                                       LIST(cmd,hostdata->disconnected_queue);
-                                       SET_NEXT(cmd, hostdata->disconnected_queue);
                                        hostdata->connected = NULL;
-                                       hostdata->disconnected_queue = cmd;
-                                       local_irq_restore(flags);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %llu was "
-                                                 "moved from connected to the "
-                                                 "disconnected_queue\n", HOSTNO,
-                                                 cmd->device->id, cmd->device->lun);
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
@@ -2303,9 +2091,6 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
                                        /* Enable reselect interrupts */
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* Wait for bus free to avoid nasty timeouts */
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
 #ifdef SUN3_SCSI_VME
                                        dregs->csr |= CSR_DMA_ENABLE;
 #endif
@@ -2324,37 +2109,30 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                case RESTORE_POINTERS:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        break;
                                case EXTENDED_MESSAGE:
                                        /*
-                                        * Extended messages are sent in the following format :
-                                        * Byte
-                                        * 0            EXTENDED_MESSAGE == 1
-                                        * 1            length (includes one byte for code, doesn't
-                                        *              include first two bytes)
-                                        * 2            code
-                                        * 3..length+1  arguments
-                                        *
-                                        * Start the extended message buffer with the EXTENDED_MESSAGE
+                                        * Start the message buffer with the EXTENDED_MESSAGE
                                         * byte, since spi_print_msg() wants the whole thing.
                                         */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
-                                                  (int)extended_msg[1], (int)extended_msg[2]);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       if (!len && extended_msg[1] <=
-                                           (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2362,8 +2140,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
-                                                          HOSTNO, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2373,15 +2151,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk(KERN_NOTICE "scsi%d: error receiving "
-                                                      "extended message\n", HOSTNO);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk(KERN_NOTICE "scsi%d: extended message "
-                                                          "code %02x length %d is too long\n",
-                                                          HOSTNO, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
                                        /*
@@ -2390,8 +2171,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                         */
                                default:
                                        if (!tmp) {
-                                               printk(KERN_INFO "scsi%d: rejecting message ",
-                                                      instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
@@ -2414,18 +2194,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       local_irq_save(flags);
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(cmd);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       complete_cmd(instance, cmd);
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       cmd->scsi_done(cmd);
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
                                msgout = NOP;
@@ -2447,22 +2220,25 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d: unknown phase\n", HOSTNO);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
                        } /* switch(phase) */
-               } /* if (tmp * SR_REQ) */
-       } /* while (1) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
+               }
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
  * Purpose : does reselection, initializing the instance->connected
- *     field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *     nexus has been reestablished,
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 
@@ -2471,7 +2247,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
 static void NCR5380_reselect(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun;
 #ifdef SUPPORT_TAGS
@@ -2480,7 +2256,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        unsigned char msg[3];
        int __maybe_unused len;
        unsigned char __maybe_unused *data, __maybe_unused phase;
-       struct scsi_cmnd *tmp = NULL, *prev;
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2488,11 +2265,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
 
-       dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
 
        /*
         * At this point, we have detected that our SCSI ID is on the bus,
@@ -2504,17 +2280,22 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       while (NCR5380_read(STATUS_REG) & SR_SEL)
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
         */
 
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
        /* acknowledge toggle to MSGIN */
@@ -2527,15 +2308,21 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
+
+       if (len) {
+               do_abort(instance);
+               return;
+       }
 #endif
 
        if (!(msg[0] & 0x80)) {
-               printk(KERN_DEBUG "scsi%d: expecting IDENTIFY message, got ", HOSTNO);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
+               printk("\n");
                do_abort(instance);
                return;
        }
-       lun = (msg[0] & 0x07);
+       lun = msg[0] & 0x07;
 
 #if defined(SUPPORT_TAGS) && !defined(CONFIG_SUN3)
        /* If the phase is still MSGIN, the target wants to send some more
@@ -2551,8 +2338,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
-                          "reselection\n", HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n",
+                        target_mask, lun, tag);
        }
 #endif
 
@@ -2561,36 +2348,34 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         * just reestablished, and remove it from the disconnected queue.
         */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL;
-            tmp; prev = tmp, tmp = NEXT(tmp)) {
-               if ((target_mask == (1 << tmp->device->id)) && (lun == tmp->device->lun)
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun
 #ifdef SUPPORT_TAGS
-                   && (tag == tmp->tag)
+                   && (tag == cmd->tag)
 #endif
                    ) {
-                       if (prev) {
-                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                               SET_NEXT(prev, NEXT(tmp));
-                       } else {
-                               REMOVE(-1, hostdata->disconnected_queue, tmp, NEXT(tmp));
-                               hostdata->disconnected_queue = NEXT(tmp);
-                       }
-                       SET_NEXT(tmp, NULL);
+                       list_del(&ncmd->list);
+                       tmp = cmd;
                        break;
                }
        }
 
-       if (!tmp) {
-               printk(KERN_WARNING "scsi%d: warning: target bitmask %02x lun %d "
-#ifdef SUPPORT_TAGS
-                      "tag %d "
-#endif
-                      "not in disconnected_queue.\n",
-                      HOSTNO, target_mask, lun
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
+       } else {
+
 #ifdef SUPPORT_TAGS
-                      , tag
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d tag %d not in disconnected queue.\n",
+                            target_mask, lun, tag);
+#else
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
 #endif
-                       );
                /*
                 * Since we have an established nexus that we can't do anything
                 * with, we must abort it.
@@ -2614,7 +2399,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                }
                /* setup this command for dma if not already */
                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
-                       sun3scsi_dma_setup(d, count, rq_data_dir(tmp->request));
+                       sun3scsi_dma_setup(instance, d, count,
+                                          rq_data_dir(tmp->request));
                        sun3_dma_setup_done = tmp;
                }
        }
@@ -2639,235 +2425,196 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at reselection\n"
-                       HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n"
+                        target_mask, lun, tag);
        }
 #endif
 
        hostdata->connected = tmp;
-       dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %llu, tag = %d\n",
-                  HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *     host byte of the result field to, if zero DID_ABORTED is
- *     used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- *      connected, you have to wait for it to complete.  If this is
- *      a problem, we could implement longjmp() / setjmp(), setjmp()
- *      called where the loop started in NCR5380_main().
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static
-int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       SETUP_HOSTDATA(instance);
-       struct scsi_cmnd *tmp, **prev;
-       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_NOTICE, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       local_irq_save(flags);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
-                   NCR5380_read(BUS_AND_STATUS_REG),
-                   NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 1
-       /*
-        * Case 1 : If the command is the currently executing command,
-        * we'll set the aborted flag and return control so that
-        * information transfer routine can exit cleanly.
-        */
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
+ */
 
-       if (hostdata->connected == cmd) {
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
-               /*
-                * We should perform BSY checking, and make sure we haven't slipped
-                * into BUS FREE.
-                */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-               /*      NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN); */
-               /*
-                * Since we can't change phases until we've completed the current
-                * handshake, we have to source or sink a byte of data if the current
-                * phase is not MSGOUT.
-                */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               /*
-                * Return control to the executing NCR drive so we can clear the
-                * aborted flag and get back into our main loop.
-                */
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
+       }
 
-               if (do_abort(instance) == 0) {
-                       hostdata->aborted = 1;
-                       hostdata->connected = NULL;
-                       cmd->result = DID_ABORT << 16;
-#ifdef SUPPORT_TAGS
-                       cmd_free_tag(cmd);
-#else
-                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       cmd->scsi_done(cmd);
-                       return SUCCESS;
-               } else {
-                       local_irq_restore(flags);
-                       printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-                       return FAILED;
-               }
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
        }
-#endif
 
-       /*
-        * Case 2 : If the command hasn't been issued yet, we simply remove it
-        *          from the issue queue.
-        */
-       for (prev = (struct scsi_cmnd **)&(hostdata->issue_queue),
-            tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                       (*prev) = NEXT(tmp);
-                       SET_NEXT(tmp, NULL);
-                       tmp->result = DID_ABORT << 16;
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
-                                   HOSTNO);
-                       /* Tagged queuing note: no tag to free here, hasn't been assigned
-                        * yet... */
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
        }
 
-       /*
-        * Case 3 : If any commands are connected, we're going to fail the abort
-        *          and let the high level SCSI driver retry at a later time or
-        *          issue a reset.
-        *
-        *          Timeouts, and therefore aborted commands, will be highly unlikely
-        *          and handling them cleanly in this situation would make the common
-        *          case of noresets less efficient, and would pollute our code.  So,
-        *          we fail.
-        */
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
-               return FAILED;
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
        }
 
-       /*
-        * Case 4: If the command is currently disconnected from the bus, and
-        *      there are no connected commands, we reconnect the I_T_L or
-        *      I_T_L_Q nexus associated with it, go into message out, and send
-        *      an abort message.
-        *
-        * This case is especially ugly. In order to reestablish the nexus, we
-        * need to call NCR5380_select().  The easiest way to implement this
-        * function was to abort if the bus was busy, and let the interrupt
-        * handler triggered on the SEL for reselect take care of lost arbitrations
-        * where necessary, meaning interrupts need to be enabled.
-        *
-        * When interrupts are enabled, the queues may change - so we
-        * can't remove it from the disconnected queue before selecting it
-        * because that could cause a failure in hashing the nexus if that
-        * device reselected.
-        *
-        * Since the queues may change, we can't use the pointers from when we
-        * first locate it.
-        *
-        * So, we must first locate the command, and if NCR5380_select()
-        * succeeds, then issue the abort, relocate the command and remove
-        * it from the disconnected queue.
-        */
-
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp;
-            tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
-
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-
-                       dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
-
-                       do_abort(instance);
-
-                       local_irq_save(flags);
-                       for (prev = (struct scsi_cmnd **)&(hostdata->disconnected_queue),
-                            tmp = (struct scsi_cmnd *)hostdata->disconnected_queue;
-                            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                                       *prev = NEXT(tmp);
-                                       SET_NEXT(tmp, NULL);
-                                       tmp->result = DID_ABORT << 16;
-                                       /* We must unlock the tag/LUN immediately here, since the
-                                        * target goes to BUS FREE and doesn't send us another
-                                        * message (COMMAND_COMPLETE or the like)
-                                        */
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(tmp);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                                       maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-                       }
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
                }
        }
 
-       /* Maybe it is sufficient just to release the ST-DMA lock... (if
-        * possible at all) At least, we should check if the lock could be
-        * released after the abort, in case it is kept due to some bug.
-        */
-       maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-       /*
-        * Case 5 : If we reached this point, the command was not found in any of
-        *          the queues.
-        *
-        * We probably reached this point because of an unlikely race condition
-        * between the command completing successfully and the abortion code,
-        * so we won't panic, but we will notify the user in case something really
-        * broke.
-        */
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-       printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       maybe_release_dma_irq(instance);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       return FAILED;
+       return result;
 }
 
 
-/*
- * Function : int NCR5380_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS or FAILURE
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
@@ -2876,23 +2623,22 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
        unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
+
+       do_reset(instance);
 
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(40);
        /* reset NCR registers */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
-       /* ++roman: reset interrupt condition! otherwise no interrupts don't get
-        * through anymore ... */
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        /* After the reset, there are no more connected or disconnected commands
         * and no busy units; so clear the low-level status here to avoid
@@ -2900,17 +2646,34 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
         * commands!
         */
 
-       if (hostdata->issue_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
-       if (hostdata->connected)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
-       if (hostdata->disconnected_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
 
-       local_irq_save(flags);
-       hostdata->issue_queue = NULL;
-       hostdata->connected = NULL;
-       hostdata->disconnected_queue = NULL;
 #ifdef SUPPORT_TAGS
        free_all_tags(hostdata);
 #endif
@@ -2920,8 +2683,9 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        hostdata->dma_len = 0;
 #endif
 
+       queue_work(hostdata->work_q, &hostdata->main_task);
        maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 5ede3da..78d1b29 100644 (file)
@@ -66,7 +66,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -98,7 +97,6 @@
 
 #define NCR5380_queue_command           atari_scsi_queue_command
 #define NCR5380_abort                   atari_scsi_abort
-#define NCR5380_show_info               atari_scsi_show_info
 #define NCR5380_info                    atari_scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
@@ -161,23 +159,10 @@ static inline unsigned long SCSI_DMA_GETADR(void)
        return adr;
 }
 
-#define HOSTDATA_DMALEN                (((struct NCR5380_hostdata *) \
-                               (atari_scsi_host->hostdata))->dma_len)
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#ifndef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#endif
-
 #ifdef REAL_DMA
 static void atari_scsi_fetch_restbytes(void);
 #endif
 
-static struct Scsi_Host *atari_scsi_host;
 static unsigned char (*atari_scsi_reg_read)(unsigned char reg);
 static void (*atari_scsi_reg_write)(unsigned char reg, unsigned char value);
 
@@ -208,12 +193,12 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
 static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
-#endif
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 
 #if defined(REAL_DMA)
@@ -273,15 +258,17 @@ static void scsi_dma_buserr(int irq, void *dummy)
 #endif
 
 
-static irqreturn_t scsi_tt_intr(int irq, void *dummy)
+static irqreturn_t scsi_tt_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        dma_stat = tt_scsi_dma.dma_ctrl;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
-                  atari_scsi_host->host_no, dma_stat & 0xff);
+       dsprintk(NDEBUG_INTR, instance, "NCR5380 interrupt, DMA status = %02x\n",
+                dma_stat & 0xff);
 
        /* Look if it was the DMA that has interrupted: First possibility
         * is that a bus error occurred...
@@ -304,7 +291,8 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
         * data reg!
         */
        if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
-               atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
+               atari_dma_residual = hostdata->dma_len -
+                       (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
 
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
@@ -356,15 +344,17 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
 
        return IRQ_HANDLED;
 }
 
 
-static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
+static irqreturn_t scsi_falcon_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        /* Turn off DMA and select sector counter register before
@@ -399,7 +389,7 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                        printk(KERN_ERR "SCSI DMA error: %ld bytes lost in "
                               "ST-DMA fifo\n", transferred & 15);
 
-               atari_dma_residual = HOSTDATA_DMALEN - transferred;
+               atari_dma_residual = hostdata->dma_len - transferred;
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
        } else
@@ -411,13 +401,14 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                 * data to the original destination address.
                 */
                memcpy(atari_dma_orig_addr, phys_to_virt(atari_dma_startaddr),
-                      HOSTDATA_DMALEN - atari_dma_residual);
+                      hostdata->dma_len - atari_dma_residual);
                atari_dma_orig_addr = NULL;
        }
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
+
        return IRQ_HANDLED;
 }
 
@@ -488,7 +479,7 @@ static int __init atari_scsi_setup(char *str)
         * Defaults depend on TT or Falcon, determined at run time.
         * Negative values mean don't change.
         */
-       int ints[6];
+       int ints[8];
 
        get_options(str, ARRAY_SIZE(ints), ints);
 
@@ -504,10 +495,11 @@ static int __init atari_scsi_setup(char *str)
                setup_sg_tablesize = ints[3];
        if (ints[0] >= 4)
                setup_hostid = ints[4];
-#ifdef SUPPORT_TAGS
        if (ints[0] >= 5)
                setup_use_tagged_queuing = ints[5];
-#endif
+       /* ints[6] (use_pdma) is ignored */
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
 
        return 1;
 }
@@ -516,38 +508,6 @@ __setup("atascsi=", atari_scsi_setup);
 #endif /* !MODULE */
 
 
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-static void __init atari_scsi_reset_boot(void)
-{
-       unsigned long end;
-
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk("Atari SCSI: resetting the SCSI bus...");
-
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay(50);
-       /* reset RST and interrupt */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
-       end = jiffies + AFTER_RESET_DELAY;
-       while (time_before(jiffies, end))
-               barrier();
-
-       printk(" done\n");
-}
-#endif
-
 #if defined(REAL_DMA)
 
 static unsigned long atari_scsi_dma_setup(struct Scsi_Host *instance,
@@ -815,14 +775,14 @@ static int atari_scsi_bus_reset(struct scsi_cmnd *cmd)
 static struct scsi_host_template atari_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = atari_scsi_show_info,
        .name                   = "Atari native SCSI",
        .info                   = atari_scsi_info,
        .queuecommand           = atari_scsi_queue_command,
        .eh_abort_handler       = atari_scsi_abort,
        .eh_bus_reset_handler   = atari_scsi_bus_reset,
        .this_id                = 7,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init atari_scsi_probe(struct platform_device *pdev)
@@ -880,7 +840,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
        } else {
                /* Test if a host id is set in the NVRam */
                if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
-                       unsigned char b = nvram_read_byte(14);
+                       unsigned char b = nvram_read_byte(16);
 
                        /* Arbitration enabled? (for TOS)
                         * If yes, use configured host ID
@@ -915,21 +875,18 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       atari_scsi_host = instance;
-
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-       atari_scsi_reset_boot();
-#endif
 
        instance->irq = irq->start;
 
        host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (IS_A_TT()) {
                error = request_irq(instance->irq, scsi_tt_intr, 0,
@@ -975,6 +932,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
 #endif
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -989,6 +948,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (atari_dma_buffer)
index 4e7cad2..bad5f32 100644 (file)
@@ -3,6 +3,7 @@ config BE2ISCSI
        depends on PCI && SCSI && NET
        select SCSI_ISCSI_ATTRS
        select ISCSI_BOOT_SYSFS
+       select IRQ_POLL
 
        help
        This driver implements the iSCSI functionality for Emulex
index 77f992e..a41c643 100644 (file)
@@ -20,7 +20,7 @@
 
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #define FW_VER_LEN     32
 #define MCC_Q_LEN      128
 #define MCC_CQ_LEN     256
@@ -101,7 +101,7 @@ struct be_eq_obj {
        struct beiscsi_hba *phba;
        struct be_queue_info *cq;
        struct work_struct work_cqs; /* Work Item */
-       struct blk_iopoll       iopoll;
+       struct irq_poll iopoll;
 };
 
 struct be_mcc_obj {
index b7087ba..022e87b 100644 (file)
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
                beiscsi_process_cq(pbe_eq);
-               blk_iopoll_enable(&pbe_eq->iopoll);
+               irq_poll_enable(&pbe_eq->iopoll);
        }
 }
 
index fe0c514..cb9072a 100644 (file)
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
        num_eq_processed = 0;
        while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
                                & EQE_VALID_MASK) {
-               if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                       blk_iopoll_sched(&pbe_eq->iopoll);
+               irq_poll_sched(&pbe_eq->iopoll);
 
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
                queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
                        spin_unlock_irqrestore(&phba->isr_lock, flags);
                        num_mcceq_processed++;
                } else {
-                       if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                               blk_iopoll_sched(&pbe_eq->iopoll);
+                       irq_poll_sched(&pbe_eq->iopoll);
                        num_ioeq_processed++;
                }
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
        hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
 }
 
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
 {
        unsigned int ret;
        struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
        pbe_eq->cq_count += ret;
        if (ret < budget) {
                phba = pbe_eq->phba;
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
                beiscsi_log(phba, KERN_INFO,
                            BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
                            "BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 
        if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
        destroy_workqueue(phba->wq);
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 free_twq:
        beiscsi_clean_port(phba);
index 0e2bee9..e22a268 100644 (file)
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(cxgb3i_snd_win, "TCP send window in bytes (default=128KB)");
 
 static int cxgb3i_rx_credit_thres = 10 * 1024;
 module_param(cxgb3i_rx_credit_thres, int, 0644);
-MODULE_PARM_DESC(rx_credit_thres,
+MODULE_PARM_DESC(cxgb3i_rx_credit_thres,
                 "RX credits return threshold in bytes (default=10KB)");
 
 static unsigned int cxgb3i_max_connect = 8 * 1024;
index 3e08812..6c14e68 100644 (file)
 
 #define DONT_USE_INTR
 
-#define NCR5380_read(reg)              inb(port + reg)
-#define NCR5380_write(reg, value)      outb(value, port + reg)
+#define NCR5380_read(reg)              inb(instance->io_port + reg)
+#define NCR5380_write(reg, value)      outb(value, instance->io_port + reg)
 
 #define NCR5380_implementation_fields  /* none */
-#define NCR5380_local_declare()                unsigned int port
-#define NCR5380_setup(instance)                port = instance->io_port
-
-/*
- * Includes needed for NCR5380.[ch] (XXX: Move them to NCR5380.h)
- */
-#include <linux/delay.h>
 
 #include "NCR5380.h"
 #include "NCR5380.c"
@@ -56,6 +49,7 @@
 
 
 static struct scsi_host_template dmx3191d_driver_template = {
+       .module                 = THIS_MODULE,
        .proc_name              = DMX3191D_DRIVER_NAME,
        .name                   = "Domex DMX3191D",
        .info                   = NCR5380_info,
@@ -67,6 +61,8 @@ static struct scsi_host_template dmx3191d_driver_template = {
        .sg_tablesize           = SG_ALL,
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int dmx3191d_probe_one(struct pci_dev *pdev,
@@ -97,17 +93,25 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
         */
        shost->irq = NO_IRQ;
 
-       NCR5380_init(shost, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
+       error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+       if (error)
+               goto out_host_put;
+
+       NCR5380_maybe_reset_bus(shost);
 
        pci_set_drvdata(pdev, shost);
 
        error = scsi_add_host(shost, &pdev->dev);
        if (error)
-               goto out_release_region;
+               goto out_exit;
 
        scsi_scan_host(shost);
        return 0;
 
+out_exit:
+       NCR5380_exit(shost);
+out_host_put:
+       scsi_host_put(shost);
  out_release_region:
        release_region(io, DMX3191D_REGION_LEN);
  out_disable_device:
@@ -119,15 +123,14 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
 static void dmx3191d_remove_one(struct pci_dev *pdev)
 {
        struct Scsi_Host *shost = pci_get_drvdata(pdev);
+       unsigned long io = shost->io_port;
 
        scsi_remove_host(shost);
 
        NCR5380_exit(shost);
-
-       release_region(shost->io_port, DMX3191D_REGION_LEN);
-       pci_disable_device(pdev);
-
        scsi_host_put(shost);
+       release_region(io, DMX3191D_REGION_LEN);
+       pci_disable_device(pdev);
 }
 
 static struct pci_device_id dmx3191d_pci_tbl[] = {
index 4c74c7b..6c736b0 100644 (file)
@@ -1,9 +1,5 @@
-
 #define PSEUDO_DMA
 #define DONT_USE_INTR
-#define UNSAFE                 /* Leave interrupts enabled during pseudo-dma I/O */
-#define DMA_WORKS_RIGHT
-
 
 /*
  * DTC 3180/3280 driver, by
 
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <scsi/scsi_host.h>
+
 #include "dtc.h"
 #define AUTOPROBE_IRQ
 #include "NCR5380.h"
@@ -150,7 +144,7 @@ static const struct signature {
 
 static int __init dtc_setup(char *str)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        int i;
        int ints[10];
 
@@ -188,7 +182,7 @@ __setup("dtc=", dtc_setup);
 
 static int __init dtc_detect(struct scsi_host_template * tpnt)
 {
-       static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
        struct Scsi_Host *instance;
        unsigned int addr;
        void __iomem *base;
@@ -205,9 +199,8 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                addr = 0;
                } else
                        for (; !addr && (current_base < NO_BASES); ++current_base) {
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                               printk(KERN_DEBUG "scsi-dtc : probing address %08x\n", bases[current_base].address);
-#endif
+                               dprintk(NDEBUG_INIT, "dtc: probing address 0x%08x\n",
+                                       (unsigned int)bases[current_base].address);
                                if (bases[current_base].noauto)
                                        continue;
                                base = ioremap(bases[current_base].address, 0x2000);
@@ -216,18 +209,14 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                for (sig = 0; sig < NO_SIGNATURES; ++sig) {
                                        if (check_signature(base + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) {
                                                addr = bases[current_base].address;
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                                               printk(KERN_DEBUG "scsi-dtc : detected board.\n");
-#endif
+                                               dprintk(NDEBUG_INIT, "dtc: detected board\n");
                                                goto found;
                                        }
                                }
                                iounmap(base);
                        }
 
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk(KERN_DEBUG "scsi-dtc : base = %08x\n", addr);
-#endif
+               dprintk(NDEBUG_INIT, "dtc: addr = 0x%08x\n", addr);
 
                if (!addr)
                        break;
@@ -235,12 +224,15 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
 found:
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
                if (instance == NULL)
-                       break;
+                       goto out_unmap;
 
                instance->base = addr;
                ((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
 
-               NCR5380_init(instance, 0);
+               if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+                       goto out_unregister;
+
+               NCR5380_maybe_reset_bus(instance);
 
                NCR5380_write(DTC_CONTROL_REG, CSR_5380_INTR);  /* Enable int's */
                if (overrides[current_override].irq != IRQ_AUTO)
@@ -271,14 +263,19 @@ found:
                        printk(KERN_WARNING "scsi%d : interrupts not used. Might as well not jumper it.\n", instance->host_no);
                instance->irq = NO_IRQ;
 #endif
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+               dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+                       instance->host_no, instance->irq);
 
                ++current_override;
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(base);
+       return count;
 }
 
 /*
@@ -331,12 +328,8 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        unsigned char *d = dst;
        int i;                  /* For counting time spent in the poll-loop */
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
        i = 0;
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, CSR_DIR_READ);
        else
@@ -348,7 +341,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_fromio(d, base + DTC_DATA_BUF, 128);
+               memcpy_fromio(d, hostdata->base + DTC_DATA_BUF, 128);
                d += 128;
                len -= 128;
                rtrc(7);
@@ -358,9 +351,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        rtrc(4);
        while (!(NCR5380_read(DTC_CONTROL_REG) & D_CR_ACCESS))
                ++i;
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        if (i > hostdata->spin_max_r)
                hostdata->spin_max_r = i;
        return (0);
@@ -383,12 +374,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 {
        int i;
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
-       /* set direction (write) */
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, 0);
        else
@@ -400,7 +386,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_toio(base + DTC_DATA_BUF, src, 128);
+               memcpy_toio(hostdata->base + DTC_DATA_BUF, src, 128);
                src += 128;
                len -= 128;
        }
@@ -413,47 +399,60 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                ++i;
        rtrc(7);
        /* Check for parity error here. fixme. */
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
        if (i > hostdata->spin_max_w)
                hostdata->spin_max_w = i;
        return (0);
 }
 
+static int dtc_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       return transfersize;
+}
+
 MODULE_LICENSE("GPL");
 
 #include "NCR5380.c"
 
 static int dtc_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name                           = "DTC 3180/3280 ",
-       .detect                         = dtc_detect,
-       .release                        = dtc_release,
-       .proc_name                      = "dtc3x80",
-       .show_info                      = dtc_show_info,
-       .write_info                     = dtc_write_info,
-       .info                           = dtc_info,
-       .queuecommand                   = dtc_queue_command,
-       .eh_abort_handler               = dtc_abort,
-       .eh_bus_reset_handler           = dtc_bus_reset,
-       .bios_param                     = dtc_biosparam,
-       .can_queue                      = CAN_QUEUE,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = CMD_PER_LUN,
-       .use_clustering                 = DISABLE_CLUSTERING,
+       .name                   = "DTC 3180/3280",
+       .detect                 = dtc_detect,
+       .release                = dtc_release,
+       .proc_name              = "dtc3x80",
+       .show_info              = dtc_show_info,
+       .write_info             = dtc_write_info,
+       .info                   = dtc_info,
+       .queuecommand           = dtc_queue_command,
+       .eh_abort_handler       = dtc_abort,
+       .eh_bus_reset_handler   = dtc_bus_reset,
+       .bios_param             = dtc_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 78a2332..56732cb 100644 (file)
 #ifndef DTC3280_H
 #define DTC3280_H
 
-#define DTCDEBUG 0
-#define DTCDEBUG_INIT  0x1
-#define DTCDEBUG_TRANSFER 0x2
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance)->hostdata)->base
+#define DTC_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + DTC_5380_OFFSET + reg)
 
-#define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
-
-#define dbNCR5380_read(reg)                                              \
-    (rval=readb(DTC_address(reg)), \
-     (((unsigned char) printk("DTC : read register %d at addr %p is: %02x\n"\
-    , (reg), DTC_address(reg), rval)), rval ) )
-
-#define dbNCR5380_write(reg, value) do {                                  \
-    printk("DTC : write %02x to register %d at address %p\n",         \
-            (value), (reg), DTC_address(reg));     \
-    writeb(value, DTC_address(reg));} while(0)
-
-
-#if !(DTCDEBUG & DTCDEBUG_TRANSFER) 
 #define NCR5380_read(reg) (readb(DTC_address(reg)))
 #define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
-#else
-#define NCR5380_read(reg) (readb(DTC_address(reg)))
-#define xNCR5380_read(reg)                                             \
-    (((unsigned char) printk("DTC : read register %d at address %p\n"\
-    , (reg), DTC_address(reg))), readb(DTC_address(reg)))
 
-#define NCR5380_write(reg, value) do {                                 \
-    printk("DTC : write %02x to register %d at address %p\n",  \
-           (value), (reg), DTC_address(reg));  \
-    writeb(value, DTC_address(reg));} while(0)
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        dtc_dma_xfer_len(cmd)
 
 #define NCR5380_intr                   dtc_intr
 #define NCR5380_queue_command          dtc_queue_command
index f8d2478..90091e6 100644 (file)
  *     
  */
 
-/* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL    msecs_to_jiffies(10)
-#define USLEEP_SLEEP   msecs_to_jiffies(200)
-#define USLEEP_WAITLONG        msecs_to_jiffies(5000)
-
 #define AUTOPROBE_IRQ
 
 #ifdef CONFIG_SCSI_GENERIC_NCR53C400
-#define NCR53C400_PSEUDO_DMA 1
 #define PSEUDO_DMA
-#define NCR53C400
 #endif
 
 #include <asm/io.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
+#include <linux/module.h>
 #include <scsi/scsi_host.h>
 #include "g_NCR5380.h"
 #include "NCR5380.h"
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/isapnp.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
 
-#define NCR_NOT_SET 0
-static int ncr_irq = NCR_NOT_SET;
-static int ncr_dma = NCR_NOT_SET;
-static int ncr_addr = NCR_NOT_SET;
-static int ncr_5380 = NCR_NOT_SET;
-static int ncr_53c400 = NCR_NOT_SET;
-static int ncr_53c400a = NCR_NOT_SET;
-static int dtc_3181e = NCR_NOT_SET;
+static int ncr_irq;
+static int ncr_dma;
+static int ncr_addr;
+static int ncr_5380;
+static int ncr_53c400;
+static int ncr_53c400a;
+static int dtc_3181e;
+static int hp_c2502;
 
 static struct override {
        NCR5380_map_type NCR5380_map_name;
@@ -121,7 +112,7 @@ static struct override {
 
 static void __init internal_setup(int board, char *str, int *ints)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        switch (board) {
        case BOARD_NCR5380:
                if (ints[0] != 2 && ints[0] != 3) {
@@ -235,6 +226,30 @@ static int __init do_DTC3181E_setup(char *str)
 
 #endif
 
+#ifndef SCSI_G_NCR5380_MEM
+/*
+ * Configure I/O address of 53C400A or DTC436 by writing magic numbers
+ * to ports 0x779 and 0x379.
+ */
+static void magic_configure(int idx, u8 irq, u8 magic[])
+{
+       u8 cfg = 0;
+
+       outb(magic[0], 0x779);
+       outb(magic[1], 0x379);
+       outb(magic[2], 0x379);
+       outb(magic[3], 0x379);
+       outb(magic[4], 0x379);
+
+       /* allowed IRQs for HP C2502 */
+       if (irq != 2 && irq != 3 && irq != 4 && irq != 5 && irq != 7)
+               irq = 0;
+       if (idx >= 0 && idx <= 7)
+               cfg = 0x80 | idx | (irq << 4);
+       outb(cfg, 0x379);
+}
+#endif
+
 /**
  *     generic_NCR5380_detect  -       look for NCR5380 controllers
  *     @tpnt: the scsi template
@@ -243,19 +258,18 @@ static int __init do_DTC3181E_setup(char *str)
  *     and DTC436(ISAPnP) controllers. If overrides have been set we use
  *     them.
  *
- *     The caller supplied NCR5380_init function is invoked from here, before
- *     the interrupt line is taken.
- *
  *     Locks: none
  */
 
 static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
 {
-       static int current_override = 0;
+       static int current_override;
        int count;
        unsigned int *ports;
+       u8 *magic = NULL;
 #ifndef SCSI_G_NCR5380_MEM
        int i;
+       int port_idx = -1;
        unsigned long region_size = 16;
 #endif
        static unsigned int __initdata ncr_53c400a_ports[] = {
@@ -264,27 +278,36 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
        static unsigned int __initdata dtc_3181e_ports[] = {
                0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
        };
-       int flags = 0;
+       static u8 ncr_53c400a_magic[] __initdata = {    /* 53C400A & DTC436 */
+               0x59, 0xb9, 0xc5, 0xae, 0xa6
+       };
+       static u8 hp_c2502_magic[] __initdata = {       /* HP C2502 */
+               0x0f, 0x22, 0xf0, 0x20, 0x80
+       };
+       int flags;
        struct Scsi_Host *instance;
+       struct NCR5380_hostdata *hostdata;
 #ifdef SCSI_G_NCR5380_MEM
        unsigned long base;
        void __iomem *iomem;
 #endif
 
-       if (ncr_irq != NCR_NOT_SET)
+       if (ncr_irq)
                overrides[0].irq = ncr_irq;
-       if (ncr_dma != NCR_NOT_SET)
+       if (ncr_dma)
                overrides[0].dma = ncr_dma;
-       if (ncr_addr != NCR_NOT_SET)
+       if (ncr_addr)
                overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
-       if (ncr_5380 != NCR_NOT_SET)
+       if (ncr_5380)
                overrides[0].board = BOARD_NCR5380;
-       else if (ncr_53c400 != NCR_NOT_SET)
+       else if (ncr_53c400)
                overrides[0].board = BOARD_NCR53C400;
-       else if (ncr_53c400a != NCR_NOT_SET)
+       else if (ncr_53c400a)
                overrides[0].board = BOARD_NCR53C400A;
-       else if (dtc_3181e != NCR_NOT_SET)
+       else if (dtc_3181e)
                overrides[0].board = BOARD_DTC3181E;
+       else if (hp_c2502)
+               overrides[0].board = BOARD_HP_C2502;
 #ifndef SCSI_G_NCR5380_MEM
        if (!current_override && isapnp_present()) {
                struct pnp_dev *dev = NULL;
@@ -318,41 +341,45 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
        }
 #endif
-       tpnt->proc_name = "g_NCR5380";
 
        for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
                if (!(overrides[current_override].NCR5380_map_name))
                        continue;
 
                ports = NULL;
+               flags = 0;
                switch (overrides[current_override].board) {
                case BOARD_NCR5380:
                        flags = FLAG_NO_PSEUDO_DMA;
                        break;
                case BOARD_NCR53C400:
-                       flags = FLAG_NCR53C400;
+#ifdef PSEUDO_DMA
+                       flags = FLAG_NO_DMA_FIXUP;
+#endif
                        break;
                case BOARD_NCR53C400A:
-                       flags = FLAG_NO_PSEUDO_DMA;
+                       flags = FLAG_NO_DMA_FIXUP;
+                       ports = ncr_53c400a_ports;
+                       magic = ncr_53c400a_magic;
+                       break;
+               case BOARD_HP_C2502:
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = ncr_53c400a_ports;
+                       magic = hp_c2502_magic;
                        break;
                case BOARD_DTC3181E:
-                       flags = FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E;
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = dtc_3181e_ports;
+                       magic = ncr_53c400a_magic;
                        break;
                }
 
 #ifndef SCSI_G_NCR5380_MEM
-               if (ports) {
+               if (ports && magic) {
                        /* wakeup sequence for the NCR53C400A and DTC3181E */
 
                        /* Disable the adapter and look for a free io port */
-                       outb(0x59, 0x779);
-                       outb(0xb9, 0x379);
-                       outb(0xc5, 0x379);
-                       outb(0xae, 0x379);
-                       outb(0xa6, 0x379);
-                       outb(0x00, 0x379);
+                       magic_configure(-1, 0, magic);
 
                        if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
                                for (i = 0; ports[i]; i++) {
@@ -371,17 +398,12 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                                }
                        if (ports[i]) {
                                /* At this point we have our region reserved */
-                               outb(0x59, 0x779);
-                               outb(0xb9, 0x379);
-                               outb(0xc5, 0x379);
-                               outb(0xae, 0x379);
-                               outb(0xa6, 0x379);
-                               outb(0x80 | i, 0x379);  /* set io port to be used */
+                               magic_configure(i, 0, magic); /* no IRQ yet */
                                outb(0xc0, ports[i] + 9);
                                if (inb(ports[i] + 9) != 0x80)
                                        continue;
-                               else
-                                       overrides[current_override].NCR5380_map_name = ports[i];
+                               overrides[current_override].NCR5380_map_name = ports[i];
+                               port_idx = i;
                        } else
                                continue;
                }
@@ -403,24 +425,65 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
 #endif
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
-               if (instance == NULL) {
-#ifndef SCSI_G_NCR5380_MEM
-                       release_region(overrides[current_override].NCR5380_map_name, region_size);
-#else
-                       iounmap(iomem);
-                       release_mem_region(base, NCR5380_region_size);
-#endif
-                       continue;
-               }
+               if (instance == NULL)
+                       goto out_release;
+               hostdata = shost_priv(instance);
 
-               instance->NCR5380_instance_name = overrides[current_override].NCR5380_map_name;
 #ifndef SCSI_G_NCR5380_MEM
+               instance->io_port = overrides[current_override].NCR5380_map_name;
                instance->n_io_port = region_size;
+               hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+               /*
+                * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+                * the base address.
+                */
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       instance->io_port += 8;
+                       hostdata->c400_ctl_status = 0;
+                       hostdata->c400_blk_cnt = 1;
+                       hostdata->c400_host_buf = 4;
+                       break;
+               case BOARD_DTC3181E:
+                       hostdata->io_width = 2; /* 16-bit PDMA */
+                       /* fall through */
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       hostdata->c400_ctl_status = 9;
+                       hostdata->c400_blk_cnt = 10;
+                       hostdata->c400_host_buf = 8;
+                       break;
+               }
 #else
-               ((struct NCR5380_hostdata *)instance->hostdata)->iomem = iomem;
+               instance->base = overrides[current_override].NCR5380_map_name;
+               hostdata->iomem = iomem;
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       hostdata->c400_ctl_status = 0x100;
+                       hostdata->c400_blk_cnt = 0x101;
+                       hostdata->c400_host_buf = 0x104;
+                       break;
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+                       goto out_unregister;
+               }
 #endif
 
-               NCR5380_init(instance, flags);
+               if (NCR5380_init(instance, flags))
+                       goto out_unregister;
+
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+               }
+
+               NCR5380_maybe_reset_bus(instance);
 
                if (overrides[current_override].irq != IRQ_AUTO)
                        instance->irq = overrides[current_override].irq;
@@ -431,12 +494,18 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                if (instance->irq == 255)
                        instance->irq = NO_IRQ;
 
-               if (instance->irq != NO_IRQ)
+               if (instance->irq != NO_IRQ) {
+#ifndef SCSI_G_NCR5380_MEM
+                       /* set IRQ for HP C2502 */
+                       if (overrides[current_override].board == BOARD_HP_C2502)
+                               magic_configure(port_idx, instance->irq, magic);
+#endif
                        if (request_irq(instance->irq, generic_NCR5380_intr,
                                        0, "NCR5380", instance)) {
                                printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
                                instance->irq = NO_IRQ;
                        }
+               }
 
                if (instance->irq == NO_IRQ) {
                        printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
@@ -447,6 +516,17 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_release:
+#ifndef SCSI_G_NCR5380_MEM
+       release_region(overrides[current_override].NCR5380_map_name, region_size);
+#else
+       iounmap(iomem);
+       release_mem_region(base, NCR5380_region_size);
+#endif
+       return count;
 }
 
 /**
@@ -460,21 +540,15 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
  
 static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
        if (instance->irq != NO_IRQ)
                free_irq(instance->irq, instance);
        NCR5380_exit(instance);
-
 #ifndef SCSI_G_NCR5380_MEM
-       release_region(instance->NCR5380_instance_name, instance->n_io_port);
+       release_region(instance->io_port, instance->n_io_port);
 #else
        iounmap(((struct NCR5380_hostdata *)instance->hostdata)->iomem);
-       release_mem_region(instance->NCR5380_instance_name, NCR5380_region_size);
+       release_mem_region(instance->base, NCR5380_region_size);
 #endif
-
-
        return 0;
 }
 
@@ -507,7 +581,7 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
 }
 #endif
 
-#ifdef NCR53C400_PSEUDO_DMA
+#ifdef PSEUDO_DMA
 
 /**
  *     NCR5380_pread           -       pseudo DMA read
@@ -521,75 +595,68 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  
 static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE | CSR_TRANS_DIR);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY);
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
-               {
-                       // FIXME - no timeout
-               }
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;  
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-       if (!(NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
+       if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
                printk("53C400r: no 53C80 gated irq after transfer");
 
-#if 0
-       /*
-        *      DON'T DO THIS - THEY NEVER ARRIVE!
-        */
-       printk("53C400r: Waiting for 53C80 registers\n");
-       while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG)
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
                ;
-#endif
+
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
                printk(KERN_ERR "53C400r: no end dma signal\n");
                
-       NCR5380_write(MODE_REG, MR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        return 0;
 }
 
@@ -605,89 +672,91 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
 
 static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-       int i;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
 
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - timeout
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - no timeout
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-#if 0
-       printk("53C400w: waiting for registers to be available\n");
-       THEY NEVER DO ! while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG);
-       printk("53C400w: Got em\n");
-#endif
-
-       /* Let's wait for this instead - could be ugly */
-       /* All documentation says to check for this. Maybe my hardware is too
-        * fast. Waiting for it seems to work fine! KLL
-        */
-       while (!(i = NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
-               ;       // FIXME - no timeout
-
-       /*
-        * I know. i is certainly != 0 here but the loop is new. See previous
-        * comment.
-        */
-       if (i) {
-               if (!((i = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_END_DMA_TRANSFER))
-                       printk(KERN_ERR "53C400w: No END OF DMA bit - WHOOPS! BASR=%0x\n", i);
-       } else
-               printk(KERN_ERR "53C400w: no 53C80 gated irq after transfer (last block)\n");
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
+               udelay(4); /* DTC436 chip hangs without this */
+               /* FIXME - no timeout */
+       }
 
-#if 0
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
                printk(KERN_ERR "53C400w: no end dma signal\n");
        }
-#endif
+
        while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
                ;       // TIMEOUT
        return 0;
 }
-#endif                         /* PSEUDO_DMA */
+
+static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       /* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
+       if (transfersize % 128)
+               transfersize = 0;
+
+       return transfersize;
+}
+
+#endif /* PSEUDO_DMA */
 
 /*
  *     Include the NCR5380 core code that we build our driver around   
@@ -696,22 +765,24 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .show_info              = generic_NCR5380_show_info,
-       .name                   = "Generic NCR5380/NCR53C400 SCSI",
-       .detect                 = generic_NCR5380_detect,
-       .release                = generic_NCR5380_release_resources,
-       .info                   = generic_NCR5380_info,
-       .queuecommand           = generic_NCR5380_queue_command,
+       .proc_name              = DRV_MODULE_NAME,
+       .name                   = "Generic NCR5380/NCR53C400 SCSI",
+       .detect                 = generic_NCR5380_detect,
+       .release                = generic_NCR5380_release_resources,
+       .info                   = generic_NCR5380_info,
+       .queuecommand           = generic_NCR5380_queue_command,
        .eh_abort_handler       = generic_NCR5380_abort,
        .eh_bus_reset_handler   = generic_NCR5380_bus_reset,
-       .bios_param             = NCR5380_BIOSPARAM,
-       .can_queue              = CAN_QUEUE,
-        .this_id               = 7,
-        .sg_tablesize          = SG_ALL,
-       .cmd_per_lun            = CMD_PER_LUN,
-        .use_clustering                = DISABLE_CLUSTERING,
+       .bios_param             = NCR5380_BIOSPARAM,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
-#include <linux/module.h>
+
 #include "scsi_module.c"
 
 module_param(ncr_irq, int, 0);
@@ -721,6 +792,7 @@ module_param(ncr_5380, int, 0);
 module_param(ncr_53c400, int, 0);
 module_param(ncr_53c400a, int, 0);
 module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
 MODULE_LICENSE("GPL");
 
 #if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
index bea1a3b..6f3d2ac 100644 (file)
 #ifndef GENERIC_NCR5380_H
 #define GENERIC_NCR5380_H
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define BIOSPARAM
 #define NCR5380_BIOSPARAM generic_NCR5380_biosparam
 #else
 #define NCR5380_BIOSPARAM NULL
 #endif
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 16
-#endif
-
 #define __STRVAL(x) #x
 #define STRVAL(x) __STRVAL(x)
 
 #ifndef SCSI_G_NCR5380_MEM
+#define DRV_MODULE_NAME "g_NCR5380"
 
-#define NCR5380_map_config port
 #define NCR5380_map_type int
 #define NCR5380_map_name port
-#define NCR5380_instance_name io_port
-#define NCR53C400_register_offset 0
-#define NCR53C400_address_adjust 8
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define NCR5380_region_size 16
 #else
 #define NCR5380_region_size 8
 #endif
 
-#define NCR5380_read(reg) (inb(NCR5380_map_name + (reg)))
-#define NCR5380_write(reg, value) (outb((value), (NCR5380_map_name + (reg))))
+#define NCR5380_read(reg) \
+       inb(instance->io_port + (reg))
+#define NCR5380_write(reg, value) \
+       outb(value, instance->io_port + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name
-
-#define NCR5380_local_declare() \
-    register NCR5380_implementation_fields
-
-#define NCR5380_setup(instance) \
-    NCR5380_map_name = (NCR5380_map_type)((instance)->NCR5380_instance_name)
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf; \
+       int io_width;
 
 #else 
 /* therefore SCSI_G_NCR5380_MEM */
+#define DRV_MODULE_NAME "g_NCR5380_mmio"
 
-#define NCR5380_map_config memory
 #define NCR5380_map_type unsigned long
 #define NCR5380_map_name base
-#define NCR5380_instance_name base
-#define NCR53C400_register_offset 0x108
-#define NCR53C400_address_adjust 0
 #define NCR53C400_mem_base 0x3880
 #define NCR53C400_host_buffer 0x3900
 #define NCR5380_region_size 0x3a00
 
-#define NCR5380_read(reg) readb(iomem + NCR53C400_mem_base + (reg))
-#define NCR5380_write(reg, value) writeb(value, iomem + NCR53C400_mem_base + (reg))
+#define NCR5380_read(reg) \
+       readb(((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+             NCR53C400_mem_base + (reg))
+#define NCR5380_write(reg, value) \
+       writeb(value, ((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+              NCR53C400_mem_base + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name; \
-    void __iomem *iomem;
-
-#define NCR5380_local_declare() \
-    register void __iomem *iomem
-
-#define NCR5380_setup(instance) \
-    iomem = (((struct NCR5380_hostdata *)(instance)->hostdata)->iomem)
+       void __iomem *iomem; \
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf;
 
 #endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        generic_NCR5380_dma_xfer_len(cmd)
+
 #define NCR5380_intr generic_NCR5380_intr
 #define NCR5380_queue_command generic_NCR5380_queue_command
 #define NCR5380_abort generic_NCR5380_abort
 #define BOARD_NCR53C400        1
 #define BOARD_NCR53C400A 2
 #define BOARD_DTC3181E 3
+#define BOARD_HP_C2502 4
 
-#endif /* ndef ASM */
 #endif /* GENERIC_NCR5380_H */
 
index 37a0c71..b676618 100644 (file)
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
        tristate "HiSilicon SAS"
+       depends on HAS_DMA
+       depends on ARM64 || COMPILE_TEST
        select SCSI_SAS_LIBSAS
        select BLK_DEV_INTEGRITY
        help
index d543811..057fdeb 100644 (file)
 /* ITCT header */
 /* qw0 */
 #define ITCT_HDR_DEV_TYPE_OFF          0
-#define ITCT_HDR_DEV_TYPE_MSK          (0x3 << ITCT_HDR_DEV_TYPE_OFF)
+#define ITCT_HDR_DEV_TYPE_MSK          (0x3ULL << ITCT_HDR_DEV_TYPE_OFF)
 #define ITCT_HDR_VALID_OFF             2
-#define ITCT_HDR_VALID_MSK             (0x1 << ITCT_HDR_VALID_OFF)
-#define ITCT_HDR_BREAK_REPLY_ENA_OFF   3
-#define ITCT_HDR_BREAK_REPLY_ENA_MSK   (0x1 << ITCT_HDR_BREAK_REPLY_ENA_OFF)
+#define ITCT_HDR_VALID_MSK             (0x1ULL << ITCT_HDR_VALID_OFF)
 #define ITCT_HDR_AWT_CONTROL_OFF       4
-#define ITCT_HDR_AWT_CONTROL_MSK       (0x1 << ITCT_HDR_AWT_CONTROL_OFF)
+#define ITCT_HDR_AWT_CONTROL_MSK       (0x1ULL << ITCT_HDR_AWT_CONTROL_OFF)
 #define ITCT_HDR_MAX_CONN_RATE_OFF     5
-#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xf << ITCT_HDR_MAX_CONN_RATE_OFF)
+#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xfULL << ITCT_HDR_MAX_CONN_RATE_OFF)
 #define ITCT_HDR_VALID_LINK_NUM_OFF    9
-#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xf << ITCT_HDR_VALID_LINK_NUM_OFF)
+#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xfULL << ITCT_HDR_VALID_LINK_NUM_OFF)
 #define ITCT_HDR_PORT_ID_OFF           13
-#define ITCT_HDR_PORT_ID_MSK           (0x7 << ITCT_HDR_PORT_ID_OFF)
+#define ITCT_HDR_PORT_ID_MSK           (0x7ULL << ITCT_HDR_PORT_ID_OFF)
 #define ITCT_HDR_SMP_TIMEOUT_OFF       16
-#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffff << ITCT_HDR_SMP_TIMEOUT_OFF)
-#define ITCT_HDR_MAX_BURST_BYTES_OFF   16
-#define ITCT_HDR_MAX_BURST_BYTES_MSK   (0xffffffff << \
-                                       ITCT_MAX_BURST_BYTES_OFF)
+#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffffULL << ITCT_HDR_SMP_TIMEOUT_OFF)
 /* qw1 */
 #define ITCT_HDR_MAX_SAS_ADDR_OFF      0
 #define ITCT_HDR_MAX_SAS_ADDR_MSK      (0xffffffffffffffff << \
                                        ITCT_HDR_MAX_SAS_ADDR_OFF)
 /* qw2 */
 #define ITCT_HDR_IT_NEXUS_LOSS_TL_OFF  0
-#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffff << \
+#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffffULL << \
                                        ITCT_HDR_IT_NEXUS_LOSS_TL_OFF)
 #define ITCT_HDR_BUS_INACTIVE_TL_OFF   16
-#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffff << \
+#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffffULL << \
                                        ITCT_HDR_BUS_INACTIVE_TL_OFF)
 #define ITCT_HDR_MAX_CONN_TL_OFF       32
-#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffff << \
+#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffffULL << \
                                        ITCT_HDR_MAX_CONN_TL_OFF)
 #define ITCT_HDR_REJ_OPEN_TL_OFF       48
-#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffff << \
-                                       ITCT_REJ_OPEN_TL_OFF)
+#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffffULL << \
+                                       ITCT_HDR_REJ_OPEN_TL_OFF)
 
 /* Err record header */
 #define ERR_HDR_DMA_TX_ERR_TYPE_OFF    0
@@ -533,10 +528,10 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
        itct->sas_addr = __swab64(itct->sas_addr);
 
        /* qw2 */
-       itct->qw2 = cpu_to_le64((500 < ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
-                               (0xff00 < ITCT_HDR_BUS_INACTIVE_TL_OFF) |
-                               (0xff00 < ITCT_HDR_MAX_CONN_TL_OFF) |
-                               (0xff00 < ITCT_HDR_REJ_OPEN_TL_OFF));
+       itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_BUS_INACTIVE_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_MAX_CONN_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF));
 }
 
 static void free_device_v1_hw(struct hisi_hba *hisi_hba,
@@ -544,7 +539,8 @@ static void free_device_v1_hw(struct hisi_hba *hisi_hba,
 {
        u64 dev_id = sas_dev->device_id;
        struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id];
-       u32 qw0, reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
+       u64 qw0;
+       u32 reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
 
        reg_val |= CFG_AGING_TIME_ITCT_REL_MSK;
        hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
index 4e1a632..f8b88fa 100644 (file)
@@ -43,6 +43,7 @@ typedef struct {
        unsigned dp:1;          /* Data phase present           */
        unsigned rd:1;          /* Read data in data phase      */
        unsigned wanted:1;      /* Parport sharing busy flag    */
+       unsigned int dev_no;    /* Device number                */
        wait_queue_head_t *waiting;
        struct Scsi_Host *host;
        struct list_head list;
@@ -1120,15 +1121,40 @@ static struct scsi_host_template imm_template = {
 
 static LIST_HEAD(imm_hosts);
 
+/*
+ * Finds the first available device number that can be alloted to the
+ * new imm device and returns the address of the previous node so that
+ * we can add to the tail and have a list in the ascending order.
+ */
+
+static inline imm_struct *find_parent(void)
+{
+       imm_struct *dev, *par = NULL;
+       unsigned int cnt = 0;
+
+       if (list_empty(&imm_hosts))
+               return NULL;
+
+       list_for_each_entry(dev, &imm_hosts, list) {
+               if (dev->dev_no != cnt)
+                       return par;
+               cnt++;
+               par = dev;
+       }
+
+       return par;
+}
+
 static int __imm_attach(struct parport *pb)
 {
        struct Scsi_Host *host;
-       imm_struct *dev;
+       imm_struct *dev, *temp;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
        DEFINE_WAIT(wait);
        int ports;
        int modes, ppb;
        int err = -ENOMEM;
+       struct pardev_cb imm_cb;
 
        init_waitqueue_head(&waiting);
 
@@ -1141,9 +1167,15 @@ static int __imm_attach(struct parport *pb)
        dev->mode = IMM_AUTODETECT;
        INIT_LIST_HEAD(&dev->list);
 
-       dev->dev = parport_register_device(pb, "imm", NULL, imm_wakeup,
-                                               NULL, 0, dev);
+       temp = find_parent();
+       if (temp)
+               dev->dev_no = temp->dev_no + 1;
+
+       memset(&imm_cb, 0, sizeof(imm_cb));
+       imm_cb.private = dev;
+       imm_cb.wakeup = imm_wakeup;
 
+       dev->dev = parport_register_dev_model(pb, "imm", &imm_cb, dev->dev_no);
        if (!dev->dev)
                goto out;
 
@@ -1207,7 +1239,10 @@ static int __imm_attach(struct parport *pb)
        host->unique_id = pb->number;
        *(imm_struct **)&host->hostdata = dev;
        dev->host = host;
-       list_add_tail(&dev->list, &imm_hosts);
+       if (!temp)
+               list_add_tail(&dev->list, &imm_hosts);
+       else
+               list_add_tail(&dev->list, &temp->list);
        err = scsi_add_host(host, NULL);
        if (err)
                goto out2;
@@ -1245,9 +1280,10 @@ static void imm_detach(struct parport *pb)
 }
 
 static struct parport_driver imm_driver = {
-       .name   = "imm",
-       .attach = imm_attach,
-       .detach = imm_detach,
+       .name           = "imm",
+       .match_port     = imm_attach,
+       .detach         = imm_detach,
+       .devmodel       = true,
 };
 
 static int __init imm_driver_init(void)
index 536cd5a..3b3e099 100644 (file)
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
        .store = ipr_store_reset_adapter
 };
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
  /**
  * ipr_show_iopoll_weight - Show ipr polling mode
  * @dev:       class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
        int i;
 
        if (!ioa_cfg->sis64) {
-               dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+               dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
                return -EINVAL;
        }
        if (kstrtoul(buf, 10, &user_iopoll_weight))
                return -EINVAL;
 
        if (user_iopoll_weight > 256) {
-               dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+               dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
                return -EINVAL;
        }
 
        if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
-               dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+               dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
                return strlen(buf);
        }
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        spin_lock_irqsave(shost->host_lock, lock_flags);
        ioa_cfg->iopoll_weight = user_iopoll_weight;
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
        spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -4003,13 +4002,12 @@ static ssize_t ipr_store_update_fw(struct device *dev,
        struct ipr_sglist *sglist;
        char fname[100];
        char *src;
-       int len, result, dnld_size;
+       int result, dnld_size;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
-       len = snprintf(fname, 99, "%s", buf);
-       fname[len-1] = '\0';
+       snprintf(fname, sizeof(fname), "%s", buf);
 
        if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
                dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
@@ -5569,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
        return num_hrrq;
 }
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
 {
        struct ipr_ioa_cfg *ioa_cfg;
        struct ipr_hrr_queue *hrrq;
@@ -5585,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
        completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
 
        if (completed_ops < budget)
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
        list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5693,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
                       hrrq->toggle_bit) {
-                       if (!blk_iopoll_sched_prep(&hrrq->iopoll))
-                               blk_iopoll_sched(&hrrq->iopoll);
+                       irq_poll_sched(&hrrq->iopoll);
                        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                        return IRQ_HANDLED;
                }
@@ -10405,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
 
@@ -10436,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                ioa_cfg->iopoll_weight = 0;
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        while (ioa_cfg->in_reset_reload) {
index a34c7a5..56c5706 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/libata.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
        u8 allow_cmds:1;
        u8 removing_ioa:1;
 
-       struct blk_iopoll iopoll;
+       struct irq_poll iopoll;
 };
 
 /* Command packet structure */
index d64a769..bb23813 100644 (file)
@@ -12,7 +12,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #define PSEUDO_DMA
 
 #define NCR5380_implementation_fields   unsigned char *pdma_base
-#define NCR5380_local_declare()         struct Scsi_Host *_instance
-#define NCR5380_setup(instance)         _instance = instance
 
-#define NCR5380_read(reg)               macscsi_read(_instance, reg)
-#define NCR5380_write(reg, value)       macscsi_write(_instance, reg, value)
+#define NCR5380_read(reg)               macscsi_read(instance, reg)
+#define NCR5380_write(reg, value)       macscsi_write(instance, reg, value)
 
 #define NCR5380_pread                   macscsi_pread
 #define NCR5380_pwrite                  macscsi_pwrite
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr                    macscsi_intr
 #define NCR5380_queue_command           macscsi_queue_command
@@ -51,8 +49,6 @@
 
 #include "NCR5380.h"
 
-#define RESET_BOOT
-
 static int setup_can_queue = -1;
 module_param(setup_can_queue, int, 0);
 static int setup_cmd_per_lun = -1;
@@ -65,17 +61,8 @@ static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#define TOSHIBA_DELAY
-
-#ifdef TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (HZ/2)
-#endif
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 /*
  * NCR 5380 register access functions
@@ -94,12 +81,12 @@ static inline void macscsi_write(struct Scsi_Host *instance, int reg, int value)
 #ifndef MODULE
 static int __init mac_scsi_setup(char *str)
 {
-       int ints[7];
+       int ints[8];
 
        (void)get_options(str, ARRAY_SIZE(ints), ints);
 
-       if (ints[0] < 1 || ints[0] > 6) {
-               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>]]]]]\n");
+       if (ints[0] < 1) {
+               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>[,<toshiba_delay>]]]]]]\n");
                return 0;
        }
        if (ints[0] >= 1)
@@ -114,50 +101,14 @@ static int __init mac_scsi_setup(char *str)
                setup_use_tagged_queuing = ints[5];
        if (ints[0] >= 6)
                setup_use_pdma = ints[6];
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
        return 1;
 }
 
 __setup("mac5380=", mac_scsi_setup);
 #endif /* !MODULE */
 
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void mac_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk(KERN_INFO "Macintosh SCSI: resetting the SCSI bus..." );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       printk(KERN_INFO " done\n" );
-}
-#endif
-
 #ifdef PSEUDO_DMA
 /* 
    Pseudo-DMA: (Ove Edlund)
@@ -235,9 +186,6 @@ static int macscsi_pread(struct Scsi_Host *instance,
        unsigned char *d;
        unsigned char *s;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = hostdata->pdma_base + (INPUT_DATA_REG << 4);
        d = dst;
 
@@ -329,9 +277,6 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
        unsigned char *s;
        unsigned char *d;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = src;
        d = hostdata->pdma_base + (OUTPUT_DATA_REG << 4);
 
@@ -364,20 +309,22 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
 #define PFX                     DRV_MODULE_NAME ": "
 
 static struct scsi_host_template mac_scsi_template = {
-       .module                         = THIS_MODULE,
-       .proc_name                      = DRV_MODULE_NAME,
-       .show_info                      = macscsi_show_info,
-       .write_info                     = macscsi_write_info,
-       .name                           = "Macintosh NCR5380 SCSI",
-       .info                           = macscsi_info,
-       .queuecommand                   = macscsi_queue_command,
-       .eh_abort_handler               = macscsi_abort,
-       .eh_bus_reset_handler           = macscsi_bus_reset,
-       .can_queue                      = 16,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = 2,
-       .use_clustering                 = DISABLE_CLUSTERING
+       .module                 = THIS_MODULE,
+       .proc_name              = DRV_MODULE_NAME,
+       .show_info              = macscsi_show_info,
+       .write_info             = macscsi_write_info,
+       .name                   = "Macintosh NCR5380 SCSI",
+       .info                   = macscsi_info,
+       .queuecommand           = macscsi_queue_command,
+       .eh_abort_handler       = macscsi_abort,
+       .eh_bus_reset_handler   = macscsi_bus_reset,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int __init mac_scsi_probe(struct platform_device *pdev)
@@ -432,15 +379,14 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
        } else
                host_flags |= FLAG_NO_PSEUDO_DMA;
 
-#ifdef RESET_BOOT
-       mac_scsi_reset_boot(instance);
-#endif
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (instance->irq != NO_IRQ) {
                error = request_irq(instance->irq, macscsi_intr, IRQF_SHARED,
@@ -449,6 +395,8 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
                        goto fail_irq;
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -463,6 +411,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
        return error;
 }
index a706927..4cf9ed9 100644 (file)
@@ -179,8 +179,12 @@ mraid_mm_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 
        /*
         * The following call will block till a kioc is available
+        * or return NULL if the list head is empty for the pointer
+        * of type mraid_mmapt passed to mraid_mm_alloc_kioc
         */
        kioc = mraid_mm_alloc_kioc(adp);
+       if (!kioc)
+               return -ENXIO;
 
        /*
         * User sent the old mimd_t ioctl packet. Convert it to uioc_t.
index e81eadd..512037e 100644 (file)
@@ -1,6 +1,4 @@
 #define PSEUDO_DMA
-#define UNSAFE  /* Not unsafe for PAS16 -- use it */
-#define PDEBUG 0
 
 /*
  * This driver adapted from Drew Eckhardt's Trantor T128 driver
  
 #include <linux/module.h>
 
-#include <linux/signal.h>
-#include <linux/proc_fs.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 
 #include <scsi/scsi_host.h>
@@ -87,8 +81,8 @@
 #include "NCR5380.h"
 
 
-static unsigned short pas16_addr = 0;
-static int pas16_irq = 0;
+static unsigned short pas16_addr;
+static int pas16_irq;
  
 
 static const int scsi_irq_translate[] =
@@ -146,22 +140,6 @@ static const unsigned short  pas16_offset[ 8 ] =
                    * START_DMA_INITIATOR_RECEIVE_REG wo
                    */
     };
-/*----------------------------------------------------------------*/
-/* the following will set the monitor border color (useful to find
- where something crashed or gets stuck at */
-/* 1 = blue
- 2 = green
- 3 = cyan
- 4 = red
- 5 = magenta
- 6 = yellow
- 7 = white
-*/
-#if 1
-#define rtrc(i) {inb(0x3da); outb(0x31, 0x3c0); outb((i), 0x3c0);}
-#else
-#define rtrc(i) {}
-#endif
 
 
 /*
@@ -205,7 +183,7 @@ static void __init
        outb( 0x01, io_port + P_TIMEOUT_STATUS_REG_OFFSET );   /* Reset TC */
        outb( 0x01, io_port + WAIT_STATE );   /* 1 Wait state */
 
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
+       inb(io_port + pas16_offset[RESET_PARITY_INTERRUPT_REG]);
 
        /* Set the SCSI interrupt pointer without mucking up the sound
         * interrupt pointer in the same byte.
@@ -280,13 +258,13 @@ static int __init
      * put in an additional test to try to weed them out.
      */
 
-    outb( 0x01, io_port + WAIT_STATE );        /* 1 Wait state */
-    NCR5380_write( MODE_REG, 0x20 );           /* Is it really SCSI? */
-    if( NCR5380_read( MODE_REG ) != 0x20 )     /* Write to a reg.    */
-       return 0;                               /* and try to read    */
-    NCR5380_write( MODE_REG, 0x00 );           /* it back.           */
-    if( NCR5380_read( MODE_REG ) != 0x00 )
-       return 0;
+       outb(0x01, io_port + WAIT_STATE);             /* 1 Wait state */
+       outb(0x20, io_port + pas16_offset[MODE_REG]); /* Is it really SCSI? */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x20) /* Write to a reg. */
+               return 0;                                  /* and try to read */
+       outb(0x00, io_port + pas16_offset[MODE_REG]);      /* it back. */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x00)
+               return 0;
 
     return 1;
 }
@@ -305,7 +283,7 @@ static int __init
 
 static int __init pas16_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -344,8 +322,8 @@ __setup("pas16=", pas16_setup);
 
 static int __init pas16_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0;
-    static unsigned short current_base = 0;
+       static int current_override;
+       static unsigned short current_base;
     struct Scsi_Host *instance;
     unsigned short io_port;
     int  count;
@@ -377,34 +355,32 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
        }
        else
            for (; !io_port && (current_base < NO_BASES); ++current_base) {
-#if (PDEBUG & PDEBUG_INIT)
-    printk("scsi-pas16 : probing io_port %04x\n", (unsigned int) bases[current_base].io_port);
-#endif
+               dprintk(NDEBUG_INIT, "pas16: probing io_port 0x%04x\n",
+                       (unsigned int)bases[current_base].io_port);
                if ( !bases[current_base].noauto &&
                     pas16_hw_detect( current_base ) ){
                        io_port = bases[current_base].io_port;
                        init_board( io_port, default_irqs[ current_base ], 0 ); 
-#if (PDEBUG & PDEBUG_INIT)
-                       printk("scsi-pas16 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "pas16: detected board\n");
                }
     }
 
-
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi-pas16 : io_port = %04x\n", (unsigned int) io_port);
-#endif
+       dprintk(NDEBUG_INIT, "pas16: io_port = 0x%04x\n",
+               (unsigned int)io_port);
 
        if (!io_port)
            break;
 
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
+               goto out;
                
        instance->io_port = io_port;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -431,14 +407,18 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
            outb( (inb(io_port + IO_CONFIG_3) & 0x0f), io_port + IO_CONFIG_3 );
        }
 
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out:
+       return count;
 }
 
 /*
@@ -561,29 +541,29 @@ static int pas16_release(struct Scsi_Host *shost)
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name           = "Pro Audio Spectrum-16 SCSI",
-       .detect         = pas16_detect,
-       .release        = pas16_release,
-       .proc_name      = "pas16",
-       .show_info      = pas16_show_info,
-       .write_info     = pas16_write_info,
-       .info           = pas16_info,
-       .queuecommand   = pas16_queue_command,
-       .eh_abort_handler = pas16_abort,
-       .eh_bus_reset_handler = pas16_bus_reset,
-       .bios_param     = pas16_biosparam, 
-       .can_queue      = CAN_QUEUE,
-       .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Pro Audio Spectrum-16 SCSI",
+       .detect                 = pas16_detect,
+       .release                = pas16_release,
+       .proc_name              = "pas16",
+       .show_info              = pas16_show_info,
+       .write_info             = pas16_write_info,
+       .info                   = pas16_info,
+       .queuecommand           = pas16_queue_command,
+       .eh_abort_handler       = pas16_abort,
+       .eh_bus_reset_handler   = pas16_bus_reset,
+       .bios_param             = pas16_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
 
index c6109c8..d375277 100644 (file)
@@ -24,9 +24,6 @@
 #ifndef PAS16_H
 #define PAS16_H
 
-#define PDEBUG_INIT    0x1
-#define PDEBUG_TRANSFER 0x2
-
 #define PAS16_DEFAULT_BASE_1  0x388
 #define PAS16_DEFAULT_BASE_2  0x384
 #define PAS16_DEFAULT_BASE_3  0x38c
 #define OPERATION_MODE_1 0xec03
 #define IO_CONFIG_3 0xf002
 
+#define NCR5380_implementation_fields /* none */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
-#define NCR5380_implementation_fields \
-    volatile unsigned short io_port
-
-#define NCR5380_local_declare() \
-    volatile unsigned short io_port
+#define PAS16_io_port(reg) (instance->io_port + pas16_offset[(reg)])
 
-#define NCR5380_setup(instance) \
-    io_port = (instance)->io_port
-
-#define PAS16_io_port(reg) ( io_port + pas16_offset[(reg)] )
-
-#if !(PDEBUG & PDEBUG_TRANSFER) 
 #define NCR5380_read(reg) ( inb(PAS16_io_port(reg)) )
 #define NCR5380_write(reg, value) ( outb((value),PAS16_io_port(reg)) )
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at io_port %04x\n"\
-    , instance->hostno, (reg), PAS16_io_port(reg))), inb( PAS16_io_port(reg)) )
-
-#define NCR5380_write(reg, value)                                      \
-    (printk("scsi%d : write %02x to register %d at io_port %04x\n",    \
-           instance->hostno, (value), (reg), PAS16_io_port(reg)),      \
-    outb( (value),PAS16_io_port(reg) ) )
-
-#endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr pas16_intr
-#define do_NCR5380_intr do_pas16_intr
 #define NCR5380_queue_command pas16_queue_command
 #define NCR5380_abort pas16_abort
 #define NCR5380_bus_reset pas16_bus_reset
    
 #define PAS16_IRQS 0xd4a8 
 
-#endif /* ndef ASM */
 #endif /* PAS16_H */
index 2c1160c..47b9d13 100644 (file)
@@ -227,6 +227,7 @@ static struct {
        {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
        {"Promise", "", NULL, BLIST_SPARSELUN},
        {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+       {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
        {"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
        {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
        {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
index 4e08d1c..bb669d3 100644 (file)
@@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
            sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
            sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
                rw_max = q->limits.io_opt =
-                       logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+                       sdkp->opt_xfer_blocks * sdp->sector_size;
        else
                rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret = 0;
 
-       if (!sdkp)
-               return 0;       /* this can happen */
+       if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
+               return 0;
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+
        if (!sdkp->device->manage_start_stop)
                return 0;
 
index 503ab8b..5e82067 100644 (file)
@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
        }
 
        sfp->mmap_called = 1;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_private_data = sfp;
        vma->vm_ops = &sg_mmap_vm_ops;
        return 0;
index 8bd54a6..64c8674 100644 (file)
@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev)
 {
        struct scsi_cd *cd = dev_get_drvdata(dev);
 
+       if (!cd)        /* E.g.: runtime suspend following sr_remove() */
+               return 0;
+
        if (cd->media_present)
                return -EBUSY;
        else
@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
        scsi_autopm_get_device(cd->device);
 
        del_gendisk(cd->disk);
+       dev_set_drvdata(dev, NULL);
 
        mutex_lock(&sr_ref_mutex);
        kref_put(&cd->kref, sr_kref_release);
index 41c115c..55627d0 100644 (file)
@@ -390,7 +390,7 @@ module_param(storvsc_ringbuffer_size, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
-MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
index 22a4283..b9de487 100644 (file)
 #define NCR5380_queue_command           sun3scsi_queue_command
 #define NCR5380_bus_reset               sun3scsi_bus_reset
 #define NCR5380_abort                   sun3scsi_abort
-#define NCR5380_show_info               sun3scsi_show_info
 #define NCR5380_info                    sun3scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 0)
+        sun3scsi_dma_setup(instance, data, count, 0)
 #define NCR5380_dma_write_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 1)
+        sun3scsi_dma_setup(instance, data, count, 1)
 #define NCR5380_dma_residual(instance) \
         sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -86,10 +85,6 @@ module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 
-/* #define RESET_BOOT */
-
-#define        AFTER_RESET_DELAY       (HZ/2)
-
 /* ms to wait after hitting dma regs */
 #define SUN3_DMA_DELAY 10
 
@@ -100,11 +95,10 @@ static struct scsi_cmnd *sun3_dma_setup_done;
 static unsigned char *sun3_scsi_regp;
 static volatile struct sun3_dma_regs *dregs;
 static struct sun3_udc_regs *udc_regs;
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-static struct Scsi_Host *default_instance;
+static unsigned char *sun3_dma_orig_addr;
+static unsigned long sun3_dma_orig_count;
+static int sun3_dma_active;
+static unsigned long last_residual;
 
 /*
  * NCR 5380 register access functions
@@ -144,50 +138,12 @@ static inline void sun3_udc_write(unsigned short val, unsigned char reg)
 }
 #endif
 
-#ifdef RESET_BOOT
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No
-        * messing with the queues, interrupts, or locks necessary here.
-        */
-
-       printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
-       /* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-//             sun3_disable_irq( IRQ_SUN3_SCSI );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       /* switch on SCSI IRQ again */
-//             sun3_enable_irq( IRQ_SUN3_SCSI );
-
-       printk( " done\n" );
-}
-#endif
-
 // safe bits for the CSR
 #define CSR_GOOD 0x060f
 
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
+static irqreturn_t scsi_sun3_intr(int irq, void *dev)
 {
+       struct Scsi_Host *instance = dev;
        unsigned short csr = dregs->csr;
        int handled = 0;
 
@@ -196,46 +152,24 @@ static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
 #endif
 
        if(csr & ~CSR_GOOD) {
-               if(csr & CSR_DMA_BUSERR) {
-                       printk("scsi%d: bus error in dma\n", default_instance->host_no);
-               }
-
-               if(csr & CSR_DMA_CONFLICT) {
-                       printk("scsi%d: dma conflict\n", default_instance->host_no);
-               }
+               if (csr & CSR_DMA_BUSERR)
+                       shost_printk(KERN_ERR, instance, "bus error in DMA\n");
+               if (csr & CSR_DMA_CONFLICT)
+                       shost_printk(KERN_ERR, instance, "DMA conflict\n");
                handled = 1;
        }
 
        if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
-               NCR5380_intr(irq, dummy);
+               NCR5380_intr(irq, dev);
                handled = 1;
        }
 
        return IRQ_RETVAL(handled);
 }
 
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk; 
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
-       unsigned long flags;
-
-       if (default_instance) {
-                       local_irq_save(flags);
-                       NCR5380_print_status(default_instance);
-                       local_irq_restore(flags);
-       }
-}
-#endif
-
-
 /* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
+static unsigned long sun3scsi_dma_setup(struct Scsi_Host *instance,
+                                void *data, unsigned long count, int write_flag)
 {
        void *addr;
 
@@ -287,10 +221,9 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
        dregs->csr |= CSR_FIFO;
        
        if(dregs->fifo_count != count) { 
-               printk("scsi%d: fifo_mismatch %04x not %04x\n",
-                      default_instance->host_no, dregs->fifo_count,
-                      (unsigned int) count);
-               NCR5380_dprint(NDEBUG_DMA, default_instance);
+               shost_printk(KERN_ERR, instance, "FIFO mismatch %04x not %04x\n",
+                            dregs->fifo_count, (unsigned int) count);
+               NCR5380_dprint(NDEBUG_DMA, instance);
        }
 
        /* setup udc */
@@ -325,21 +258,6 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
 
 }
 
-#ifndef SUN3_SCSI_VME
-static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
-{
-       unsigned short resid;
-
-       dregs->udc_addr = 0x32; 
-       udelay(SUN3_DMA_DELAY);
-       resid = dregs->udc_data;
-       udelay(SUN3_DMA_DELAY);
-       resid *= 2;
-
-       return (unsigned long) resid;
-}
-#endif
-
 static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 {
        return last_residual;
@@ -437,7 +355,10 @@ static int sun3scsi_dma_finish(int write_flag)
                }
        }
 
-       count = sun3scsi_dma_count(default_instance);
+       dregs->udc_addr = 0x32;
+       udelay(SUN3_DMA_DELAY);
+       count = 2 * dregs->udc_data;
+       udelay(SUN3_DMA_DELAY);
 
        fifo = dregs->fifo_count;
        last_residual = fifo;
@@ -502,17 +423,17 @@ static int sun3scsi_dma_finish(int write_flag)
 static struct scsi_host_template sun3_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = sun3scsi_show_info,
        .name                   = SUN3_SCSI_NAME,
        .info                   = sun3scsi_info,
        .queuecommand           = sun3scsi_queue_command,
-       .eh_abort_handler       = sun3scsi_abort,
-       .eh_bus_reset_handler   = sun3scsi_bus_reset,
+       .eh_abort_handler       = sun3scsi_abort,
+       .eh_bus_reset_handler   = sun3scsi_bus_reset,
        .can_queue              = 16,
        .this_id                = 7,
        .sg_tablesize           = SG_NONE,
        .cmd_per_lun            = 2,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init sun3_scsi_probe(struct platform_device *pdev)
@@ -591,7 +512,6 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       default_instance = instance;
 
        instance->io_port = (unsigned long)ioaddr;
        instance->irq = irq->start;
@@ -600,7 +520,9 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        error = request_irq(instance->irq, scsi_sun3_intr, 0,
                            "NCR5380", instance);
@@ -631,9 +553,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
 #endif
 
-#ifdef RESET_BOOT
-       sun3_scsi_reset_boot(instance);
-#endif
+       NCR5380_maybe_reset_bus(instance);
 
        error = scsi_add_host(instance, NULL);
        if (error)
@@ -649,6 +569,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (udc_regs)
index 87828ac..4615fda 100644 (file)
  * 15 9-11
  */
  
-#include <linux/signal.h>
 #include <linux/io.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/delay.h>
 
 #include <scsi/scsi_host.h>
 #include "t128.h"
@@ -126,7 +123,7 @@ static struct signature {
 
 static int __init t128_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -165,7 +162,7 @@ __setup("t128=", t128_setup);
 
 static int __init t128_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
     struct Scsi_Host *instance;
     unsigned long base;
     void __iomem *p;
@@ -182,9 +179,8 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                base = 0;
        } else 
            for (; !base && (current_base < NO_BASES); ++current_base) {
-#if (TDEBUG & TDEBUG_INIT)
-    printk("scsi-t128 : probing address %08x\n", bases[current_base].address);
-#endif
+               dprintk(NDEBUG_INIT, "t128: probing address 0x%08x\n",
+                       bases[current_base].address);
                if (bases[current_base].noauto)
                        continue;
                p = ioremap(bases[current_base].address, 0x2000);
@@ -195,17 +191,13 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                                        signatures[sig].string,
                                        strlen(signatures[sig].string))) {
                        base = bases[current_base].address;
-#if (TDEBUG & TDEBUG_INIT)
-                       printk("scsi-t128 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "t128: detected board\n");
                        goto found;
                    }
                iounmap(p);
            }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi-t128 : base = %08x\n", (unsigned int) base);
-#endif
+       dprintk(NDEBUG_INIT, "t128: base = 0x%08x\n", (unsigned int)base);
 
        if (!base)
            break;
@@ -213,12 +205,15 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
 found:
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
-               
+               goto out_unmap;
+
        instance->base = base;
        ((struct NCR5380_hostdata *)instance->hostdata)->base = p;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -242,27 +237,30 @@ found:
            printk("scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
        }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d: irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(p);
+       return count;
 }
 
 static int t128_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
@@ -308,14 +306,14 @@ static int t128_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  *     timeout.
  */
 
-static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *d = dst;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -354,14 +352,14 @@ static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
  *     timeout.
  */
 
-static inline int NCR5380_pwrite (struct Scsi_Host *instance, unsigned char *src,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *s = src;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -392,21 +390,23 @@ MODULE_LICENSE("GPL");
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .name           = "Trantor T128/T128F/T228",
-       .detect         = t128_detect,
-       .release        = t128_release,
-       .proc_name      = "t128",
-       .show_info      = t128_show_info,
-       .write_info     = t128_write_info,
-       .info           = t128_info,
-       .queuecommand   = t128_queue_command,
-       .eh_abort_handler = t128_abort,
-       .eh_bus_reset_handler    = t128_bus_reset,
-       .bios_param     = t128_biosparam,
-       .can_queue      = CAN_QUEUE,
-        .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Trantor T128/T128F/T228",
+       .detect                 = t128_detect,
+       .release                = t128_release,
+       .proc_name              = "t128",
+       .show_info              = t128_show_info,
+       .write_info             = t128_write_info,
+       .info                   = t128_info,
+       .queuecommand           = t128_queue_command,
+       .eh_abort_handler       = t128_abort,
+       .eh_bus_reset_handler   = t128_bus_reset,
+       .bios_param             = t128_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 2c73714..dd16d85 100644 (file)
 #ifndef T128_H
 #define T128_H
 
-#define TDEBUG         0
-#define TDEBUG_INIT    0x1
-#define TDEBUG_TRANSFER 0x2
-
 /*
  * The trantor boards are memory mapped. They use an NCR5380 or
  * equivalent (my sample board had part second sourced from ZILOG).
 
 #define T_DATA_REG_OFFSET      0x1e00  /* rw 512 bytes long */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance->hostdata))->base
+#define T128_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + T_5380_OFFSET + ((reg) * 0x20))
 
-#define T128_address(reg) (base + T_5380_OFFSET + ((reg) * 0x20))
-
-#if !(TDEBUG & TDEBUG_TRANSFER)
 #define NCR5380_read(reg) readb(T128_address(reg))
 #define NCR5380_write(reg, value) writeb((value),(T128_address(reg)))
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at address %08x\n"\
-    , instance->hostno, (reg), T128_address(reg))), readb(T128_address(reg)))
-
-#define NCR5380_write(reg, value) {                                    \
-    printk("scsi%d : write %02x to register %d at address %08x\n",     \
-           instance->hostno, (value), (reg), T128_address(reg));       \
-    writeb((value), (T128_address(reg)));                              \
-}
-#endif
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr t128_intr
-#define do_NCR5380_intr do_t128_intr
 #define NCR5380_queue_command t128_queue_command
 #define NCR5380_abort t128_abort
 #define NCR5380_bus_reset t128_bus_reset
 
 #define T128_IRQS 0xc4a8
 
-#endif /* ndef ASM */
 #endif /* T128_H */
index fb2b393..8826020 100644 (file)
@@ -7,6 +7,7 @@ source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
 source "drivers/soc/sunxi/Kconfig"
+source "drivers/soc/tegra/Kconfig"
 source "drivers/soc/ti/Kconfig"
 source "drivers/soc/versatile/Kconfig"
 
index 0ad66fa..5548a31 100644 (file)
@@ -288,7 +288,7 @@ static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
        struct spm_driver_data *drv = NULL;
        struct device_node *cpu_node, *saw_node;
        int cpu;
-       bool found;
+       bool found = 0;
 
        for_each_possible_cpu(cpu) {
                cpu_node = of_cpu_device_node_get(cpu);
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
new file mode 100644 (file)
index 0000000..d0c3c3e
--- /dev/null
@@ -0,0 +1,83 @@
+if ARCH_TEGRA
+
+# 32-bit ARM SoCs
+if ARM
+
+config ARCH_TEGRA_2x_SOC
+       bool "Enable support for Tegra20 family"
+       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+       select ARM_ERRATA_720789
+       select ARM_ERRATA_754327 if SMP
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA20
+       select PL310_ERRATA_727915 if CACHE_L2X0
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra AP20 and T20 processors, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_3x_SOC
+       bool "Enable support for Tegra30 family"
+       select ARM_ERRATA_754322
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA30
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T30 processor family, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_114_SOC
+       bool "Enable support for Tegra114 family"
+       select ARM_ERRATA_798181 if SMP
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA114
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T114 processor family, based on the
+         ARM CortexA15MP CPU
+
+config ARCH_TEGRA_124_SOC
+       bool "Enable support for Tegra124 family"
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA124
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T124 processor family, based on the
+         ARM CortexA15MP CPU
+
+endif
+
+# 64-bit ARM SoCs
+if ARM64
+
+config ARCH_TEGRA_132_SOC
+       bool "NVIDIA Tegra132 SoC"
+       select PINCTRL_TEGRA124
+       help
+         Enable support for NVIDIA Tegra132 SoC, based on the Denver
+         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
+         but contains an NVIDIA Denver CPU complex in place of
+         Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_TEGRA_210_SOC
+       bool "NVIDIA Tegra210 SoC"
+       select PINCTRL_TEGRA210
+       help
+         Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1,
+         the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53
+         cores in a switched configuration. It features a GPU of the Maxwell
+         architecture with support for DX11, SM4, OpenGL 4.5, OpenGL ES 3.1
+         and providing 256 CUDA cores. It supports hardware-accelerated en-
+         and decoding of various video standards including H.265, H.264 and
+         VP8 at 4K resolution and up to 60 fps.
+
+         Besides the multimedia features it also comes with a variety of I/O
+         controllers, such as GPIO, I2C, SPI, SDHCI, PCIe, SATA and XHCI, to
+         name only a few.
+
+endif
+endif
index cde5ff7..d1a7507 100644 (file)
@@ -613,9 +613,10 @@ out:
        return err;
 }
 
-static int ssb_bus_register(struct ssb_bus *bus,
-                           ssb_invariants_func_t get_invariants,
-                           unsigned long baseaddr)
+static int __maybe_unused
+ssb_bus_register(struct ssb_bus *bus,
+                ssb_invariants_func_t get_invariants,
+                unsigned long baseaddr)
 {
        int err;
 
index 58d4517..b9519be 100644 (file)
@@ -6,6 +6,7 @@ menu "Analog to digital converters"
 config AD7606
        tristate "Analog Devices AD7606 ADC driver"
        depends on GPIOLIB || COMPILE_TEST
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index f129039..6928710 100644 (file)
@@ -217,8 +217,12 @@ error_ret:
 static int ade7753_reset(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(6); /* Software Chip Reset */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
@@ -343,8 +347,12 @@ error_ret:
 static int ade7753_stop_device(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(4);  /* AD converters can be turned off */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
index d6273e1..a80d993 100644 (file)
@@ -151,16 +151,12 @@ do {                                                                          \
 
 #define LIBCFS_FREE(ptr, size)                                   \
 do {                                                               \
-       int s = (size);                                          \
        if (unlikely((ptr) == NULL)) {                            \
                CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
-                      "%s:%d\n", s, __FILE__, __LINE__);              \
+                      "%s:%d\n", (int)(size), __FILE__, __LINE__);     \
                break;                                            \
        }                                                              \
-       if (unlikely(s > LIBCFS_VMALLOC_SIZE))                    \
-               vfree(ptr);                                 \
-       else                                                        \
-               kfree(ptr);                                       \
+       kvfree(ptr);                                      \
 } while (0)
 
 /******************************************************************************/
index 72af486..cb74ae7 100644 (file)
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
 static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
 {
-       struct ib_device_attr *attr;
-       int rc;
-
        /* It's safe to assume a HCA can handle a page size
         * matching that of the native system */
        hdev->ibh_page_shift = PAGE_SHIFT;
        hdev->ibh_page_size  = 1 << PAGE_SHIFT;
        hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
 
-       LIBCFS_ALLOC(attr, sizeof(*attr));
-       if (attr == NULL) {
-               CERROR("Out of memory\n");
-               return -ENOMEM;
-       }
-
-       rc = ib_query_device(hdev->ibh_ibdev, attr);
-       if (rc == 0)
-               hdev->ibh_mr_size = attr->max_mr_size;
-
-       LIBCFS_FREE(attr, sizeof(*attr));
-
-       if (rc != 0) {
-               CERROR("Failed to query IB device: %d\n", rc);
-               return rc;
-       }
-
+       hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
        if (hdev->ibh_mr_size == ~0ULL) {
                hdev->ibh_mr_shift = 64;
                return 0;
index 7b35531..8982f7d 100644 (file)
@@ -1858,7 +1858,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        int api32 = ll_need_32bit_api(sbi);
        loff_t ret = -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (origin) {
        case SEEK_SET:
                break;
@@ -1896,7 +1896,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        goto out;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index c92d58b..39e2ffd 100644 (file)
@@ -2082,17 +2082,17 @@ putgl:
        /* update time if requested */
        rc = 0;
        if (llss->ia2.ia_valid != 0) {
-               mutex_lock(&llss->inode1->i_mutex);
+               inode_lock(llss->inode1);
                rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
-               mutex_unlock(&llss->inode1->i_mutex);
+               inode_unlock(llss->inode1);
        }
 
        if (llss->ia1.ia_valid != 0) {
                int rc1;
 
-               mutex_lock(&llss->inode2->i_mutex);
+               inode_lock(llss->inode2);
                rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
-               mutex_unlock(&llss->inode2->i_mutex);
+               inode_unlock(llss->inode2);
                if (rc == 0)
                        rc = rc1;
        }
@@ -2179,13 +2179,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
                         ATTR_MTIME | ATTR_MTIME_SET |
                         ATTR_ATIME | ATTR_ATIME_SET;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = ll_setattr_raw(file->f_path.dentry, attr, true);
        if (rc == -ENODATA)
                rc = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        kfree(attr);
 free_hss:
@@ -2609,7 +2609,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* catch async errors that were recorded back when async writeback
         * failed for pages in this mapping. */
@@ -2641,7 +2641,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                        fd->fd_write_failed = false;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
index ee8a1d6..845e992 100644 (file)
@@ -631,8 +631,6 @@ struct ll_file_data {
 
 struct lov_stripe_md;
 
-extern spinlock_t inode_lock;
-
 extern struct dentry *llite_root;
 extern struct kset *llite_kset;
 
index 1db93af..b2fc5b3 100644 (file)
@@ -1277,7 +1277,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
                return -ENOMEM;
 
        if (!S_ISDIR(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        memcpy(&op_data->op_attr, attr, sizeof(*attr));
 
@@ -1358,7 +1358,7 @@ out:
        ll_finish_md_op_data(op_data);
 
        if (!S_ISDIR(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
                        inode_dio_wait(inode);
        }
index e578a11..18aab25 100644 (file)
@@ -245,9 +245,9 @@ static int ll_get_name(struct dentry *dentry, char *name,
                goto out;
        }
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        rc = ll_dir_read(dir, &lgd.ctx);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        if (!rc && !lgd.lgd_found)
                rc = -ENOENT;
 out:
index 420d391..871924b 100644 (file)
@@ -257,9 +257,9 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
         *    be asked to write less pages once, this purely depends on
         *    implementation. Anyway, we should be careful to avoid deadlocking.
         */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        cl_io_fini(env, io);
        return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
 }
index 95cdb0c..f355474 100644 (file)
@@ -115,8 +115,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
                struct inode *inode = vmpage->mapping->host;
                loff_t pos;
 
-               if (mutex_trylock(&inode->i_mutex)) {
-                       mutex_unlock(&(inode)->i_mutex);
+               if (inode_trylock(inode)) {
+                       inode_unlock((inode));
 
                        /* this is too bad. Someone is trying to write the
                         * page w/o holding inode mutex. This means we can
index 39fa13b..711fda9 100644 (file)
@@ -403,7 +403,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
         * 1. Need inode mutex to operate transient pages.
         */
        if (iov_iter_rw(iter) == READ)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        LASSERT(obj->cob_transient_pages == 0);
        while (iov_iter_count(iter)) {
@@ -454,7 +454,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 out:
        LASSERT(obj->cob_transient_pages == 0);
        if (iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if (tot_bytes > 0) {
                if (iov_iter_rw(iter) == WRITE) {
index f68e972..0920ac6 100644 (file)
@@ -439,7 +439,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(io->ci_obj);
        int result = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (cl_io_is_trunc(io))
                result = vvp_io_setattr_trunc(env, ios, inode,
                                        io->u.ci_setattr.sa_attr.lvb_size);
@@ -459,7 +459,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
                 * because osc has already notified to destroy osc_extents. */
                vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static void vvp_io_setattr_fini(const struct lu_env *env,
index 99c0d7a..a133475 100644 (file)
@@ -428,7 +428,7 @@ static void vvp_transient_page_verify(const struct cl_page *page)
 {
        struct inode *inode = ccc_object_inode(page->cp_obj);
 
-       LASSERT(!mutex_trylock(&inode->i_mutex));
+       LASSERT(!inode_trylock(inode));
 }
 
 static int vvp_transient_page_own(const struct lu_env *env,
@@ -480,9 +480,9 @@ static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(slice->cpl_obj);
        int     locked;
 
-       locked = !mutex_trylock(&inode->i_mutex);
+       locked = !inode_trylock(inode);
        if (!locked)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return locked ? -EBUSY : -ENODATA;
 }
 
@@ -502,7 +502,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
        struct ccc_object *clobj = cl2ccc(clp->cp_obj);
 
        vvp_page_fini_common(cp);
-       LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+       LASSERT(!inode_trylock(clobj->cob_inode));
        clobj->cob_transient_pages--;
 }
 
@@ -548,7 +548,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
        } else {
                struct ccc_object *clobj = cl2ccc(obj);
 
-               LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+               LASSERT(!inode_trylock(clobj->cob_inode));
                cl_page_slice_add(page, &cpg->cpg_cl, obj,
                                &vvp_transient_page_ops);
                clobj->cob_transient_pages++;
index 79ac192..70b8f4f 100644 (file)
@@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd)
        lcd_send_serial(0x1F);  /* R/W=W, RS=0 */
        lcd_send_serial(cmd & 0x0F);
        lcd_send_serial((cmd >> 4) & 0x0F);
-       /* the shortest command takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest command takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@ static void lcd_write_data_s(int data)
        lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
        lcd_send_serial(data & 0x0F);
        lcd_send_serial((data >> 4) & 0x0F);
-       /* the shortest data takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest data takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, cmd);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_CLR;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(120, 500); /* the shortest command takes at least 120 us */
+       udelay(120);    /* the shortest command takes at least 120 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_SET;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(45, 100);  /* the shortest data takes at least 45 us */
+       udelay(45);     /* the shortest data takes at least 45 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the control port */
        w_ctr(pprt, cmd);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void)
                lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
                lcd_send_serial(' ' & 0x0F);
                lcd_send_serial((' ' >> 4) & 0x0F);
-               usleep_range(40, 100);  /* the shortest data takes at least 40 us */
+               udelay(40);     /* the shortest data takes at least 40 us */
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void)
                w_dtr(pprt, ' ');
 
                /* maintain the data during 20 us before the strobe */
-               usleep_range(20, 100);
+               udelay(20);
 
                bits.e = BIT_SET;
                bits.rs = BIT_SET;
@@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void)
                set_ctrl_bits();
 
                /* maintain the strobe during 40 us */
-               usleep_range(40, 100);
+               udelay(40);
 
                bits.e = BIT_CLR;
                set_ctrl_bits();
 
                /* the shortest data takes at least 45 us */
-               usleep_range(45, 100);
+               udelay(45);
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void)
        for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
                /* present the data to the data port */
                w_dtr(pprt, ' ');
-               usleep_range(60, 120);
+               udelay(60);
        }
 
        spin_unlock_irq(&pprt_lock);
index ba87650..f1f3eca 100644 (file)
@@ -22,12 +22,6 @@ menuconfig STAGING_RDMA
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/amso1100/Kconfig"
-
-source "drivers/staging/rdma/ehca/Kconfig"
-
 source "drivers/staging/rdma/hfi1/Kconfig"
 
-source "drivers/staging/rdma/ipath/Kconfig"
-
 endif
index 139d78e..8c7fc1d 100644 (file)
@@ -1,5 +1,2 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_AMSO1100)      += amso1100/
-obj-$(CONFIG_INFINIBAND_EHCA)  += ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)  += hfi1/
-obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
diff --git a/drivers/staging/rdma/amso1100/Kbuild b/drivers/staging/rdma/amso1100/Kbuild
deleted file mode 100644 (file)
index 950dfab..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
-
-iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
-       c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/staging/rdma/amso1100/Kconfig b/drivers/staging/rdma/amso1100/Kconfig
deleted file mode 100644 (file)
index e6ce5f2..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_AMSO1100
-       tristate "Ammasso 1100 HCA support"
-       depends on PCI && INET
-       ---help---
-         This is a low-level driver for the Ammasso 1100 host
-         channel adapter (HCA).
-
-config INFINIBAND_AMSO1100_DEBUG
-       bool "Verbose debugging output"
-       depends on INFINIBAND_AMSO1100
-       default n
-       ---help---
-         This option causes the amso1100 driver to produce a bunch of
-         debug messages.  Select this if you are developing the driver
-         or trying to diagnose a problem.
diff --git a/drivers/staging/rdma/amso1100/TODO b/drivers/staging/rdma/amso1100/TODO
deleted file mode 100644 (file)
index 18b00a5..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-7/2015
-
-The amso1100 driver has been deprecated and moved to drivers/staging.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/amso1100/c2.c b/drivers/staging/rdma/amso1100/c2.c
deleted file mode 100644 (file)
index b46ebd1..0000000
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/prefetch.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_provider.h"
-
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
-    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-
-static int debug = -1;         /* defaults above */
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-static int c2_up(struct net_device *netdev);
-static int c2_down(struct net_device *netdev);
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-static void c2_tx_interrupt(struct net_device *netdev);
-static void c2_rx_interrupt(struct net_device *netdev);
-static irqreturn_t c2_interrupt(int irq, void *dev_id);
-static void c2_tx_timeout(struct net_device *netdev);
-static int c2_change_mtu(struct net_device *netdev, int new_mtu);
-static void c2_reset(struct c2_port *c2_port);
-
-static struct pci_device_id c2_pci_table[] = {
-       { PCI_DEVICE(0x18b8, 0xb001) },
-       { 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, c2_pci_table);
-
-static void c2_set_rxbufsize(struct c2_port *c2_port)
-{
-       struct net_device *netdev = c2_port->netdev;
-
-       if (netdev->mtu > RX_BUF_SIZE)
-               c2_port->rx_buf_size =
-                   netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
-                   NET_IP_ALIGN;
-       else
-               c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
-}
-
-/*
- * Allocate TX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_txp_ring)
-{
-       struct c2_tx_desc *tx_desc;
-       struct c2_txp_desc __iomem *txp_desc;
-       struct c2_element *elem;
-       int i;
-
-       tx_ring->start = kmalloc_array(tx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!tx_ring->start)
-               return -ENOMEM;
-
-       elem = tx_ring->start;
-       tx_desc = vaddr;
-       txp_desc = mmio_txp_ring;
-       for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
-               tx_desc->len = 0;
-               tx_desc->status = 0;
-
-               /* Set TXP_HTXD_UNINIT */
-               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                            (void __iomem *) txp_desc + C2_TXP_ADDR);
-               __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                            (void __iomem *) txp_desc + C2_TXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = tx_desc;
-               elem->hw_desc = txp_desc;
-
-               if (i == tx_ring->count - 1) {
-                       elem->next = tx_ring->start;
-                       tx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       tx_desc->next_offset =
-                           base + (i + 1) * sizeof(*tx_desc);
-               }
-       }
-
-       tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
-
-       return 0;
-}
-
-/*
- * Allocate RX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_rxp_ring)
-{
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_desc __iomem *rxp_desc;
-       struct c2_element *elem;
-       int i;
-
-       rx_ring->start = kmalloc_array(rx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!rx_ring->start)
-               return -ENOMEM;
-
-       elem = rx_ring->start;
-       rx_desc = vaddr;
-       rxp_desc = mmio_rxp_ring;
-       for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
-               rx_desc->len = 0;
-               rx_desc->status = 0;
-
-               /* Set RXP_HRXD_UNINIT */
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
-                      (void __iomem *) rxp_desc + C2_RXP_STATUS);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            (void __iomem *) rxp_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            (void __iomem *) rxp_desc + C2_RXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = rx_desc;
-               elem->hw_desc = rxp_desc;
-
-               if (i == rx_ring->count - 1) {
-                       elem->next = rx_ring->start;
-                       rx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       rx_desc->next_offset =
-                           base + (i + 1) * sizeof(*rx_desc);
-               }
-       }
-
-       rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
-
-       return 0;
-}
-
-/* Setup buffer for receiving */
-static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       struct c2_rxp_hdr *rxp_hdr;
-
-       skb = dev_alloc_skb(c2_port->rx_buf_size);
-       if (unlikely(!skb)) {
-               pr_debug("%s: out of memory for receive\n",
-                       c2_port->netdev->name);
-               return -ENOMEM;
-       }
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(skb->data, 0, sizeof(*rxp_hdr));
-
-       skb->dev = c2_port->netdev;
-
-       maplen = c2_port->rx_buf_size;
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen,
-                          PCI_DMA_FROMDEVICE);
-
-       /* Set the sk_buff RXP_header to RXP_HRXD_READY */
-       rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-       rxp_hdr->flags = RXP_HRXD_READY;
-
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-       rx_desc->len = maplen;
-
-       return 0;
-}
-
-/*
- * Allocate buffers for the Rx ring
- * For receive:  rx_ring.to_clean is next received frame
- */
-static int c2_rx_fill(struct c2_port *c2_port)
-{
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       int ret = 0;
-
-       elem = rx_ring->start;
-       do {
-               if (c2_rx_alloc(c2_port, elem)) {
-                       ret = 1;
-                       break;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-
-       rx_ring->to_clean = rx_ring->start;
-       return ret;
-}
-
-/* Free all buffers in RX ring, assumes receiver stopped */
-static void c2_rx_clean(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-
-       elem = rx_ring->start;
-       do {
-               rx_desc = elem->ht_desc;
-               rx_desc->len = 0;
-
-               __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-               __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-               __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            elem->hw_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            elem->hw_desc + C2_RXP_FLAGS);
-
-               if (elem->skb) {
-                       pci_unmap_single(c2dev->pcidev, elem->mapaddr,
-                                        elem->maplen, PCI_DMA_FROMDEVICE);
-                       dev_kfree_skb(elem->skb);
-                       elem->skb = NULL;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-}
-
-static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
-{
-       struct c2_tx_desc *tx_desc = elem->ht_desc;
-
-       tx_desc->len = 0;
-
-       pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
-                        PCI_DMA_TODEVICE);
-
-       if (elem->skb) {
-               dev_kfree_skb_any(elem->skb);
-               elem->skb = NULL;
-       }
-
-       return 0;
-}
-
-/* Free all buffers in TX ring, assumes transmitter stopped */
-static void c2_tx_clean(struct c2_port *c2_port)
-{
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-       int retry;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       elem = tx_ring->start;
-
-       do {
-               retry = 0;
-               do {
-                       txp_htxd.flags =
-                           readw(elem->hw_desc + C2_TXP_FLAGS);
-
-                       if (txp_htxd.flags == TXP_HTXD_READY) {
-                               retry = 1;
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq(0,
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                               c2_port->netdev->stats.tx_dropped++;
-                               break;
-                       } else {
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                       }
-
-                       c2_tx_free(c2_port->c2dev, elem);
-
-               } while ((elem = elem->next) != tx_ring->start);
-       } while (retry);
-
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
-
-       if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(c2_port->netdev);
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-}
-
-/*
- * Process transmit descriptors marked 'DONE' by the firmware,
- * freeing up their unneeded sk_buffs.
- */
-static void c2_tx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-
-       spin_lock(&c2_port->tx_lock);
-
-       for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
-            elem = elem->next) {
-               txp_htxd.flags =
-                   be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
-
-               if (txp_htxd.flags != TXP_HTXD_DONE)
-                       break;
-
-               if (netif_msg_tx_done(c2_port)) {
-                       /* PCI reads are expensive in fast path */
-                       txp_htxd.len =
-                           be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
-                       pr_debug("%s: tx done slot %3Zu status 0x%x len "
-                               "%5u bytes\n",
-                               netdev->name, elem - tx_ring->start,
-                               txp_htxd.flags, txp_htxd.len);
-               }
-
-               c2_tx_free(c2dev, elem);
-               ++(c2_port->tx_avail);
-       }
-
-       tx_ring->to_clean = elem;
-
-       if (netif_queue_stopped(netdev)
-           && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(netdev);
-
-       spin_unlock(&c2_port->tx_lock);
-}
-
-static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-
-       if (rxp_hdr->status != RXP_HRXD_OK ||
-           rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
-               pr_debug("BAD RXP_HRXD\n");
-               pr_debug("  rx_desc : %p\n", rx_desc);
-               pr_debug("    index : %Zu\n",
-                       elem - c2_port->rx_ring.start);
-               pr_debug("    len   : %u\n", rx_desc->len);
-               pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
-                       (void *) __pa((unsigned long) rxp_hdr));
-               pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
-               pr_debug("    status: 0x%x\n", rxp_hdr->status);
-               pr_debug("    len   : %u\n", rxp_hdr->len);
-               pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
-       }
-
-       /* Setup the skb for reuse since we're dropping this pkt */
-       elem->skb->data = elem->skb->head;
-       skb_reset_tail_pointer(elem->skb);
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(elem->skb->data, 0, sizeof(*rxp_hdr));
-
-       /* Write the descriptor to the adapter's rx ring */
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-       __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
-                    elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       pr_debug("packet dropped\n");
-       c2_port->netdev->stats.rx_dropped++;
-}
-
-static void c2_rx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen, buflen;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-
-       /* Begin where we left off */
-       rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
-
-       for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
-            elem = elem->next) {
-               rx_desc = elem->ht_desc;
-               mapaddr = elem->mapaddr;
-               maplen = elem->maplen;
-               skb = elem->skb;
-               rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-
-               if (rxp_hdr->flags != RXP_HRXD_DONE)
-                       break;
-               buflen = rxp_hdr->len;
-
-               /* Sanity check the RXP header */
-               if (rxp_hdr->status != RXP_HRXD_OK ||
-                   buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /*
-                * Allocate and map a new skb for replenishing the host
-                * RX desc
-                */
-               if (c2_rx_alloc(c2_port, elem)) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /* Unmap the old skb */
-               pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
-                                PCI_DMA_FROMDEVICE);
-
-               prefetch(skb->data);
-
-               /*
-                * Skip past the leading 8 bytes comprising of the
-                * "struct c2_rxp_hdr", prepended by the adapter
-                * to the usual Ethernet header ("struct ethhdr"),
-                * to the start of the raw Ethernet packet.
-                *
-                * Fix up the various fields in the sk_buff before
-                * passing it up to netif_rx(). The transfer size
-                * (in bytes) specified by the adapter len field of
-                * the "struct rxp_hdr_t" does NOT include the
-                * "sizeof(struct c2_rxp_hdr)".
-                */
-               skb->data += sizeof(*rxp_hdr);
-               skb_set_tail_pointer(skb, buflen);
-               skb->len = buflen;
-               skb->protocol = eth_type_trans(skb, netdev);
-
-               netif_rx(skb);
-
-               netdev->stats.rx_packets++;
-               netdev->stats.rx_bytes += buflen;
-       }
-
-       /* Save where we left off */
-       rx_ring->to_clean = elem;
-       c2dev->cur_rx = elem - rx_ring->start;
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-}
-
-/*
- * Handle netisr0 TX & RX interrupts.
- */
-static irqreturn_t c2_interrupt(int irq, void *dev_id)
-{
-       unsigned int netisr0, dmaisr;
-       int handled = 0;
-       struct c2_dev *c2dev = dev_id;
-
-       /* Process CCILNET interrupts */
-       netisr0 = readl(c2dev->regs + C2_NISR0);
-       if (netisr0) {
-
-               /*
-                * There is an issue with the firmware that always
-                * provides the status of RX for both TX & RX
-                * interrupts.  So process both queues here.
-                */
-               c2_rx_interrupt(c2dev->netdev);
-               c2_tx_interrupt(c2dev->netdev);
-
-               /* Clear the interrupt */
-               writel(netisr0, c2dev->regs + C2_NISR0);
-               handled++;
-       }
-
-       /* Process RNIC interrupts */
-       dmaisr = readl(c2dev->regs + C2_DISR);
-       if (dmaisr) {
-               writel(dmaisr, c2dev->regs + C2_DISR);
-               c2_rnic_interrupt(c2dev);
-               handled++;
-       }
-
-       if (handled) {
-               return IRQ_HANDLED;
-       } else {
-               return IRQ_NONE;
-       }
-}
-
-static int c2_up(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_element *elem;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct in_device *in_dev;
-       size_t rx_size, tx_size;
-       int ret, i;
-       unsigned int netimr0;
-
-       if (netif_msg_ifup(c2_port))
-               pr_debug("%s: enabling interface\n", netdev->name);
-
-       /* Set the Rx buffer size based on MTU */
-       c2_set_rxbufsize(c2_port);
-
-       /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
-       rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
-       tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
-
-       c2_port->mem_size = tx_size + rx_size;
-       c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
-                                            &c2_port->dma);
-       if (c2_port->mem == NULL) {
-               pr_debug("Unable to allocate memory for "
-                       "host descriptor rings\n");
-               return -ENOMEM;
-       }
-
-       /* Create the Rx host descriptor ring */
-       if ((ret =
-            c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
-                             c2dev->mmio_rxp_ring))) {
-               pr_debug("Unable to create RX ring\n");
-               goto bail0;
-       }
-
-       /* Allocate Rx buffers for the host descriptor ring */
-       if (c2_rx_fill(c2_port)) {
-               pr_debug("Unable to fill RX ring\n");
-               goto bail1;
-       }
-
-       /* Create the Tx host descriptor ring */
-       if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
-                                   c2_port->dma + rx_size,
-                                   c2dev->mmio_txp_ring))) {
-               pr_debug("Unable to create TX ring\n");
-               goto bail1;
-       }
-
-       /* Set the TX pointer to where we left off */
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
-           c2_port->tx_ring.start + c2dev->cur_tx;
-
-       /* missing: Initialize MAC */
-
-       BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
-       for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
-            i++, elem++) {
-               rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-               rxp_hdr->flags = 0;
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                            elem->hw_desc + C2_RXP_FLAGS);
-       }
-
-       /* Enable network packets */
-       netif_start_queue(netdev);
-
-       /* Enable IRQ */
-       writel(0, c2dev->regs + C2_IDIS);
-       netimr0 = readl(c2dev->regs + C2_NIMR0);
-       netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
-       writel(netimr0, c2dev->regs + C2_NIMR0);
-
-       /* Tell the stack to ignore arp requests for ipaddrs bound to
-        * other interfaces.  This is needed to prevent the host stack
-        * from responding to arp requests to the ipaddr bound on the
-        * rdma interface.
-        */
-       in_dev = in_dev_get(netdev);
-       IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
-       in_dev_put(in_dev);
-
-       return 0;
-
-bail1:
-       c2_rx_clean(c2_port);
-       kfree(c2_port->rx_ring.start);
-
-bail0:
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return ret;
-}
-
-static int c2_down(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-
-       if (netif_msg_ifdown(c2_port))
-               pr_debug("%s: disabling interface\n",
-                       netdev->name);
-
-       /* Wait for all the queued packets to get sent */
-       c2_tx_interrupt(netdev);
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Disable IRQs by clearing the interrupt mask */
-       writel(1, c2dev->regs + C2_IDIS);
-       writel(0, c2dev->regs + C2_NIMR0);
-
-       /* missing: Stop transmitter */
-
-       /* missing: Stop receiver */
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* missing: Turn off LEDs here */
-
-       /* Free all buffers in the host descriptor rings */
-       c2_tx_clean(c2_port);
-       c2_rx_clean(c2_port);
-
-       /* Free the host descriptor rings */
-       kfree(c2_port->rx_ring.start);
-       kfree(c2_port->tx_ring.start);
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return 0;
-}
-
-static void c2_reset(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       unsigned int cur_rx = c2dev->cur_rx;
-
-       /* Tell the hardware to quiesce */
-       C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
-
-       /*
-        * The hardware will reset the C2_PCI_HRX_QUI bit once
-        * the RXP is quiesced.  Wait 2 seconds for this.
-        */
-       ssleep(2);
-
-       cur_rx = C2_GET_CUR_RX(c2dev);
-
-       if (cur_rx & C2_PCI_HRX_QUI)
-               pr_debug("c2_reset: failed to quiesce the hardware!\n");
-
-       cur_rx &= ~C2_PCI_HRX_QUI;
-
-       c2dev->cur_rx = cur_rx;
-
-       pr_debug("Current RX: %u\n", c2dev->cur_rx);
-}
-
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       unsigned long flags;
-       unsigned int i;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
-               netif_stop_queue(netdev);
-               spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-               pr_debug("%s: Tx ring full when queue awake!\n",
-                       netdev->name);
-               return NETDEV_TX_BUSY;
-       }
-
-       maplen = skb_headlen(skb);
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
-
-       elem = tx_ring->to_use;
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-
-       /* Tell HW to xmit */
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                    elem->hw_desc + C2_TXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(maplen),
-                    elem->hw_desc + C2_TXP_LEN);
-       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                    elem->hw_desc + C2_TXP_FLAGS);
-
-       netdev->stats.tx_packets++;
-       netdev->stats.tx_bytes += maplen;
-
-       /* Loop thru additional data fragments and queue them */
-       if (skb_shinfo(skb)->nr_frags) {
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-                       maplen = skb_frag_size(frag);
-                       mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
-                                                  0, maplen, DMA_TO_DEVICE);
-                       elem = elem->next;
-                       elem->skb = NULL;
-                       elem->mapaddr = mapaddr;
-                       elem->maplen = maplen;
-
-                       /* Tell HW to xmit */
-                       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                                    elem->hw_desc + C2_TXP_ADDR);
-                       __raw_writew((__force u16) cpu_to_be16(maplen),
-                                    elem->hw_desc + C2_TXP_LEN);
-                       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                                    elem->hw_desc + C2_TXP_FLAGS);
-
-                       netdev->stats.tx_packets++;
-                       netdev->stats.tx_bytes += maplen;
-               }
-       }
-
-       tx_ring->to_use = elem->next;
-       c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
-
-       if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
-               netif_stop_queue(netdev);
-               if (netif_msg_tx_queued(c2_port))
-                       pr_debug("%s: transmit queue full\n",
-                               netdev->name);
-       }
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-       netdev->trans_start = jiffies;
-
-       return NETDEV_TX_OK;
-}
-
-static void c2_tx_timeout(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-
-       if (netif_msg_timer(c2_port))
-               pr_debug("%s: tx timeout\n", netdev->name);
-
-       c2_tx_clean(c2_port);
-}
-
-static int c2_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       int ret = 0;
-
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       if (netif_running(netdev)) {
-               c2_down(netdev);
-
-               c2_up(netdev);
-       }
-
-       return ret;
-}
-
-static const struct net_device_ops c2_netdev = {
-       .ndo_open               = c2_up,
-       .ndo_stop               = c2_down,
-       .ndo_start_xmit         = c2_xmit_frame,
-       .ndo_tx_timeout         = c2_tx_timeout,
-       .ndo_change_mtu         = c2_change_mtu,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-/* Initialize network device */
-static struct net_device *c2_devinit(struct c2_dev *c2dev,
-                                    void __iomem * mmio_addr)
-{
-       struct c2_port *c2_port = NULL;
-       struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
-
-       if (!netdev) {
-               pr_debug("c2_port etherdev alloc failed");
-               return NULL;
-       }
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       netdev->netdev_ops = &c2_netdev;
-       netdev->watchdog_timeo = C2_TX_TIMEOUT;
-       netdev->irq = c2dev->pcidev->irq;
-
-       c2_port = netdev_priv(netdev);
-       c2_port->netdev = netdev;
-       c2_port->c2dev = c2dev;
-       c2_port->msg_enable = netif_msg_init(debug, default_msg);
-       c2_port->tx_ring.count = C2_NUM_TX_DESC;
-       c2_port->rx_ring.count = C2_NUM_RX_DESC;
-
-       spin_lock_init(&c2_port->tx_lock);
-
-       /* Copy our 48-bit ethernet hardware address */
-       memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
-
-       /* Validate the MAC address */
-       if (!is_valid_ether_addr(netdev->dev_addr)) {
-               pr_debug("Invalid MAC Address\n");
-               pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name,
-                        netdev->dev_addr, netdev->irq);
-               free_netdev(netdev);
-               return NULL;
-       }
-
-       c2dev->netdev = netdev;
-
-       return netdev;
-}
-
-static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
-       int ret = 0, i;
-       unsigned long reg0_start, reg0_flags, reg0_len;
-       unsigned long reg2_start, reg2_flags, reg2_len;
-       unsigned long reg4_start, reg4_flags, reg4_len;
-       unsigned kva_map_size;
-       struct net_device *netdev = NULL;
-       struct c2_dev *c2dev = NULL;
-       void __iomem *mmio_regs = NULL;
-
-       printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
-               DRV_VERSION);
-
-       /* Enable PCI device */
-       ret = pci_enable_device(pcidev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
-                       pci_name(pcidev));
-               goto bail0;
-       }
-
-       reg0_start = pci_resource_start(pcidev, BAR_0);
-       reg0_len = pci_resource_len(pcidev, BAR_0);
-       reg0_flags = pci_resource_flags(pcidev, BAR_0);
-
-       reg2_start = pci_resource_start(pcidev, BAR_2);
-       reg2_len = pci_resource_len(pcidev, BAR_2);
-       reg2_flags = pci_resource_flags(pcidev, BAR_2);
-
-       reg4_start = pci_resource_start(pcidev, BAR_4);
-       reg4_len = pci_resource_len(pcidev, BAR_4);
-       reg4_flags = pci_resource_flags(pcidev, BAR_4);
-
-       pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
-       pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
-       pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
-
-       /* Make sure PCI base addr are MMIO */
-       if (!(reg0_flags & IORESOURCE_MEM) ||
-           !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
-               printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Check for weird/broken PCI region reporting */
-       if ((reg0_len < C2_REG0_SIZE) ||
-           (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
-               printk(KERN_ERR PFX "Invalid PCI region sizes\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Reserve PCI I/O and memory resources */
-       ret = pci_request_regions(pcidev, DRV_NAME);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to request regions\n",
-                       pci_name(pcidev));
-               goto bail1;
-       }
-
-       if ((sizeof(dma_addr_t) > 4)) {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "64b DMA configuration failed\n");
-                       goto bail2;
-               }
-       } else {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "32b DMA configuration failed\n");
-                       goto bail2;
-               }
-       }
-
-       /* Enables bus-mastering on the device */
-       pci_set_master(pcidev);
-
-       /* Remap the adapter PCI registers in BAR4 */
-       mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                   sizeof(struct c2_adapter_pci_regs));
-       if (!mmio_regs) {
-               printk(KERN_ERR PFX
-                       "Unable to remap adapter PCI registers in BAR4\n");
-               ret = -EIO;
-               goto bail2;
-       }
-
-       /* Validate PCI regs magic */
-       for (i = 0; i < sizeof(c2_magic); i++) {
-               if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
-                       printk(KERN_ERR PFX "Downlevel Firmware boot loader "
-                               "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
-                              "utility to update your boot loader\n",
-                               i + 1, sizeof(c2_magic),
-                               readb(mmio_regs + C2_REGS_MAGIC + i),
-                               c2_magic[i]);
-                       printk(KERN_ERR PFX "Adapter not claimed\n");
-                       iounmap(mmio_regs);
-                       ret = -EIO;
-                       goto bail2;
-               }
-       }
-
-       /* Validate the adapter version */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
-               printk(KERN_ERR PFX "Version mismatch "
-                       "[fw=%u, c2=%u], Adapter not claimed\n",
-                       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
-                       C2_VERSION);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Validate the adapter IVN */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
-               printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
-                      "the OpenIB device support kit. "
-                      "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
-                      C2_IVN);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Allocate hardware structure */
-       c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
-       if (!c2dev) {
-               printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
-                       pci_name(pcidev));
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       memset(c2dev, 0, sizeof(*c2dev));
-       spin_lock_init(&c2dev->lock);
-       c2dev->pcidev = pcidev;
-       c2dev->cur_tx = 0;
-
-       /* Get the last RX index */
-       c2dev->cur_rx =
-           (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
-            0xffffc000) / sizeof(struct c2_rxp_desc);
-
-       /* Request an interrupt line for the driver */
-       ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
-                       pci_name(pcidev), pcidev->irq);
-               iounmap(mmio_regs);
-               goto bail3;
-       }
-
-       /* Set driver specific data */
-       pci_set_drvdata(pcidev, c2dev);
-
-       /* Initialize network device */
-       if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail4;
-       }
-
-       /* Save off the actual size prior to unmapping mmio_regs */
-       kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
-
-       /* Unmap the adapter PCI registers in BAR4 */
-       iounmap(mmio_regs);
-
-       /* Register network device */
-       ret = register_netdev(netdev);
-       if (ret) {
-               printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
-                       ret);
-               goto bail5;
-       }
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Remap the adapter HRXDQ PA space to kernel VA space */
-       c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
-                                              C2_RXP_HRXDQ_SIZE);
-       if (!c2dev->mmio_rxp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
-               ret = -EIO;
-               goto bail6;
-       }
-
-       /* Remap the adapter HTXDQ PA space to kernel VA space */
-       c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
-                                              C2_TXP_HTXDQ_SIZE);
-       if (!c2dev->mmio_txp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
-               ret = -EIO;
-               goto bail7;
-       }
-
-       /* Save off the current RX index in the last 4 bytes of the TXP Ring */
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       /* Remap the PCI registers in adapter BAR0 to kernel VA space */
-       c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-       if (!c2dev->regs) {
-               printk(KERN_ERR PFX "Unable to remap BAR0\n");
-               ret = -EIO;
-               goto bail8;
-       }
-
-       /* Remap the PCI registers in adapter BAR4 to kernel VA space */
-       c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
-       c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                    kva_map_size);
-       if (!c2dev->kva) {
-               printk(KERN_ERR PFX "Unable to remap BAR4\n");
-               ret = -EIO;
-               goto bail9;
-       }
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr,
-                netdev->irq);
-
-       ret = c2_rnic_init(c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
-               goto bail10;
-       }
-
-       ret = c2_register_device(c2dev);
-       if (ret)
-               goto bail10;
-
-       return 0;
-
- bail10:
-       iounmap(c2dev->kva);
-
- bail9:
-       iounmap(c2dev->regs);
-
- bail8:
-       iounmap(c2dev->mmio_txp_ring);
-
- bail7:
-       iounmap(c2dev->mmio_rxp_ring);
-
- bail6:
-       unregister_netdev(netdev);
-
- bail5:
-       free_netdev(netdev);
-
- bail4:
-       free_irq(pcidev->irq, c2dev);
-
- bail3:
-       ib_dealloc_device(&c2dev->ibdev);
-
- bail2:
-       pci_release_regions(pcidev);
-
- bail1:
-       pci_disable_device(pcidev);
-
- bail0:
-       return ret;
-}
-
-static void c2_remove(struct pci_dev *pcidev)
-{
-       struct c2_dev *c2dev = pci_get_drvdata(pcidev);
-       struct net_device *netdev = c2dev->netdev;
-
-       /* Unregister with OpenIB */
-       c2_unregister_device(c2dev);
-
-       /* Clean up the RNIC resources */
-       c2_rnic_term(c2dev);
-
-       /* Remove network device from the kernel */
-       unregister_netdev(netdev);
-
-       /* Free network device */
-       free_netdev(netdev);
-
-       /* Free the interrupt line */
-       free_irq(pcidev->irq, c2dev);
-
-       /* missing: Turn LEDs off here */
-
-       /* Unmap adapter PA space */
-       iounmap(c2dev->kva);
-       iounmap(c2dev->regs);
-       iounmap(c2dev->mmio_txp_ring);
-       iounmap(c2dev->mmio_rxp_ring);
-
-       /* Free the hardware structure */
-       ib_dealloc_device(&c2dev->ibdev);
-
-       /* Release reserved PCI I/O and memory resources */
-       pci_release_regions(pcidev);
-
-       /* Disable PCI device */
-       pci_disable_device(pcidev);
-
-       /* Clear driver specific data */
-       pci_set_drvdata(pcidev, NULL);
-}
-
-static struct pci_driver c2_pci_driver = {
-       .name = DRV_NAME,
-       .id_table = c2_pci_table,
-       .probe = c2_probe,
-       .remove = c2_remove,
-};
-
-module_pci_driver(c2_pci_driver);
diff --git a/drivers/staging/rdma/amso1100/c2.h b/drivers/staging/rdma/amso1100/c2.h
deleted file mode 100644 (file)
index 21b565a..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __C2_H
-#define __C2_H
-
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/idr.h>
-
-#include "c2_provider.h"
-#include "c2_mq.h"
-#include "c2_status.h"
-
-#define DRV_NAME     "c2"
-#define DRV_VERSION  "1.1"
-#define PFX          DRV_NAME ": "
-
-#define BAR_0                0
-#define BAR_2                2
-#define BAR_4                4
-
-#define RX_BUF_SIZE         (1536 + 8)
-#define ETH_JUMBO_MTU        9000
-#define C2_MAGIC            "CEPHEUS"
-#define C2_VERSION           4
-#define C2_IVN              (18 & 0x7fffffff)
-
-#define C2_REG0_SIZE        (16 * 1024)
-#define C2_REG2_SIZE        (2 * 1024 * 1024)
-#define C2_REG4_SIZE        (256 * 1024 * 1024)
-#define C2_NUM_TX_DESC       341
-#define C2_NUM_RX_DESC       256
-#define C2_PCI_REGS_OFFSET  (0x10000)
-#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
-#define C2_RXP_HRXDQ_SIZE   (4096)
-#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
-#define C2_TXP_HTXDQ_SIZE   (4096)
-#define C2_TX_TIMEOUT      (6*HZ)
-
-/* CEPHEUS */
-static const u8 c2_magic[] = {
-       0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
-};
-
-enum adapter_pci_regs {
-       C2_REGS_MAGIC = 0x0000,
-       C2_REGS_VERS = 0x0008,
-       C2_REGS_IVN = 0x000C,
-       C2_REGS_PCI_WINSIZE = 0x0010,
-       C2_REGS_Q0_QSIZE = 0x0014,
-       C2_REGS_Q0_MSGSIZE = 0x0018,
-       C2_REGS_Q0_POOLSTART = 0x001C,
-       C2_REGS_Q0_SHARED = 0x0020,
-       C2_REGS_Q1_QSIZE = 0x0024,
-       C2_REGS_Q1_MSGSIZE = 0x0028,
-       C2_REGS_Q1_SHARED = 0x0030,
-       C2_REGS_Q2_QSIZE = 0x0034,
-       C2_REGS_Q2_MSGSIZE = 0x0038,
-       C2_REGS_Q2_SHARED = 0x0040,
-       C2_REGS_ENADDR = 0x004C,
-       C2_REGS_RDMA_ENADDR = 0x0054,
-       C2_REGS_HRX_CUR = 0x006C,
-};
-
-struct c2_adapter_pci_regs {
-       char reg_magic[8];
-       u32 version;
-       u32 ivn;
-       u32 pci_window_size;
-       u32 q0_q_size;
-       u32 q0_msg_size;
-       u32 q0_pool_start;
-       u32 q0_shared;
-       u32 q1_q_size;
-       u32 q1_msg_size;
-       u32 q1_pool_start;
-       u32 q1_shared;
-       u32 q2_q_size;
-       u32 q2_msg_size;
-       u32 q2_pool_start;
-       u32 q2_shared;
-       u32 log_start;
-       u32 log_size;
-       u8 host_enaddr[8];
-       u8 rdma_enaddr[8];
-       u32 crash_entry;
-       u32 crash_ready[2];
-       u32 fw_txd_cur;
-       u32 fw_hrxd_cur;
-       u32 fw_rxd_cur;
-};
-
-enum pci_regs {
-       C2_HISR = 0x0000,
-       C2_DISR = 0x0004,
-       C2_HIMR = 0x0008,
-       C2_DIMR = 0x000C,
-       C2_NISR0 = 0x0010,
-       C2_NISR1 = 0x0014,
-       C2_NIMR0 = 0x0018,
-       C2_NIMR1 = 0x001C,
-       C2_IDIS = 0x0020,
-};
-
-enum {
-       C2_PCI_HRX_INT = 1 << 8,
-       C2_PCI_HTX_INT = 1 << 17,
-       C2_PCI_HRX_QUI = 1 << 31,
-};
-
-/*
- * Cepheus registers in BAR0.
- */
-struct c2_pci_regs {
-       u32 hostisr;
-       u32 dmaisr;
-       u32 hostimr;
-       u32 dmaimr;
-       u32 netisr0;
-       u32 netisr1;
-       u32 netimr0;
-       u32 netimr1;
-       u32 int_disable;
-};
-
-/* TXP flags */
-enum c2_txp_flags {
-       TXP_HTXD_DONE = 0,
-       TXP_HTXD_READY = 1 << 0,
-       TXP_HTXD_UNINIT = 1 << 1,
-};
-
-/* RXP flags */
-enum c2_rxp_flags {
-       RXP_HRXD_UNINIT = 0,
-       RXP_HRXD_READY = 1 << 0,
-       RXP_HRXD_DONE = 1 << 1,
-};
-
-/* RXP status */
-enum c2_rxp_status {
-       RXP_HRXD_ZERO = 0,
-       RXP_HRXD_OK = 1 << 0,
-       RXP_HRXD_BUF_OV = 1 << 1,
-};
-
-/* TXP descriptor fields */
-enum txp_desc {
-       C2_TXP_FLAGS = 0x0000,
-       C2_TXP_LEN = 0x0002,
-       C2_TXP_ADDR = 0x0004,
-};
-
-/* RXP descriptor fields */
-enum rxp_desc {
-       C2_RXP_FLAGS = 0x0000,
-       C2_RXP_STATUS = 0x0002,
-       C2_RXP_COUNT = 0x0004,
-       C2_RXP_LEN = 0x0006,
-       C2_RXP_ADDR = 0x0008,
-};
-
-struct c2_txp_desc {
-       u16 flags;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_desc {
-       u16 flags;
-       u16 status;
-       u16 count;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_hdr {
-       u16 flags;
-       u16 status;
-       u16 len;
-       u16 rsvd;
-} __attribute__ ((packed));
-
-struct c2_tx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_rx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_alloc {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_array {
-       struct {
-               void **page;
-               int used;
-       } *page_list;
-};
-
-/*
- * The MQ shared pointer pool is organized as a linked list of
- * chunks. Each chunk contains a linked list of free shared pointers
- * that can be allocated to a given user mode client.
- *
- */
-struct sp_chunk {
-       struct sp_chunk *next;
-       dma_addr_t dma_addr;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 head;
-       u16 shared_ptr[0];
-};
-
-struct c2_pd_table {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_qp_table {
-       struct idr idr;
-       spinlock_t lock;
-};
-
-struct c2_element {
-       struct c2_element *next;
-       void *ht_desc;          /* host     descriptor */
-       void __iomem *hw_desc;  /* hardware descriptor */
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-};
-
-struct c2_ring {
-       struct c2_element *to_clean;
-       struct c2_element *to_use;
-       struct c2_element *start;
-       unsigned long count;
-};
-
-struct c2_dev {
-       struct ib_device ibdev;
-       void __iomem *regs;
-       void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
-       void __iomem *mmio_rxp_ring;
-       spinlock_t lock;
-       struct pci_dev *pcidev;
-       struct net_device *netdev;
-       struct net_device *pseudo_netdev;
-       unsigned int cur_tx;
-       unsigned int cur_rx;
-       u32 adapter_handle;
-       int device_cap_flags;
-       void __iomem *kva;      /* KVA device memory */
-       unsigned long pa;       /* PA device memory */
-       void **qptr_array;
-
-       struct kmem_cache *host_msg_cache;
-
-       struct list_head cca_link;              /* adapter list */
-       struct list_head eh_wakeup_list;        /* event wakeup list */
-       wait_queue_head_t req_vq_wo;
-
-       /* Cached RNIC properties */
-       struct ib_device_attr props;
-
-       struct c2_pd_table pd_table;
-       struct c2_qp_table qp_table;
-       int ports;              /* num of GigE ports */
-       int devnum;
-       spinlock_t vqlock;      /* sync vbs req MQ */
-
-       /* Verbs Queues */
-       struct c2_mq req_vq;    /* Verbs Request MQ */
-       struct c2_mq rep_vq;    /* Verbs Reply MQ */
-       struct c2_mq aeq;       /* Async Events MQ */
-
-       /* Kernel client MQs */
-       struct sp_chunk *kern_mqsp_pool;
-
-       /* Device updates these values when posting messages to a host
-        * target queue */
-       u16 req_vq_shared;
-       u16 rep_vq_shared;
-       u16 aeq_shared;
-       u16 irq_claimed;
-
-       /*
-        * Shared host target pages for user-accessible MQs.
-        */
-       int hthead;             /* index of first free entry */
-       void *htpages;          /* kernel vaddr */
-       int htlen;              /* length of htpages memory */
-       void *htuva;            /* user mapped vaddr */
-       spinlock_t htlock;      /* serialize allocation */
-
-       u64 adapter_hint_uva;   /* access to the activity FIFO */
-
-       //      spinlock_t aeq_lock;
-       //      spinlock_t rnic_lock;
-
-       __be16 *hint_count;
-       dma_addr_t hint_count_dma;
-       u16 hints_read;
-
-       int init;               /* TRUE if it's ready */
-       char ae_cache_name[16];
-       char vq_cache_name[16];
-};
-
-struct c2_port {
-       u32 msg_enable;
-       struct c2_dev *c2dev;
-       struct net_device *netdev;
-
-       spinlock_t tx_lock;
-       u32 tx_avail;
-       struct c2_ring tx_ring;
-       struct c2_ring rx_ring;
-
-       void *mem;              /* PCI memory for host rings */
-       dma_addr_t dma;
-       unsigned long mem_size;
-
-       u32 rx_buf_size;
-};
-
-/*
- * Activity FIFO registers in BAR0.
- */
-#define PCI_BAR0_HOST_HINT     0x100
-#define PCI_BAR0_ADAPTER_HINT  0x2000
-
-/*
- * Ammasso PCI vendor id and Cepheus PCI device id.
- */
-#define CQ_ARMED       0x01
-#define CQ_WAIT_FOR_DMA        0x80
-
-/*
- * The format of a hint is as follows:
- * Lower 16 bits are the count of hints for the queue.
- * Next 15 bits are the qp_index
- * Upper most bit depends on who reads it:
- *    If read by producer, then it means Full (1) or Not-Full (0)
- *    If read by consumer, then it means Empty (1) or Not-Empty (0)
- */
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-
-/*
- * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
- * struct.
- */
-#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
-
-#ifndef readq
-static inline u64 readq(const void __iomem * addr)
-{
-       u64 ret = readl(addr + 4);
-       ret <<= 32;
-       ret |= readl(addr);
-
-       return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void __raw_writeq(u64 val, void __iomem * addr)
-{
-       __raw_writel((u32) (val), addr);
-       __raw_writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
-#define C2_SET_CUR_RX(c2dev, cur_rx) \
-       __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
-
-#define C2_GET_CUR_RX(c2dev) \
-       be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
-
-static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct c2_dev, ibdev);
-}
-
-static inline int c2_errno(void *reply)
-{
-       switch (c2_wr_get_result(reply)) {
-       case C2_OK:
-               return 0;
-       case CCERR_NO_BUFS:
-       case CCERR_INSUFFICIENT_RESOURCES:
-       case CCERR_ZERO_RDMA_READ_RESOURCES:
-               return -ENOMEM;
-       case CCERR_MR_IN_USE:
-       case CCERR_QP_IN_USE:
-               return -EBUSY;
-       case CCERR_ADDR_IN_USE:
-               return -EADDRINUSE;
-       case CCERR_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       case CCERR_CONN_RESET:
-               return -ECONNRESET;
-       case CCERR_NOT_IMPLEMENTED:
-       case CCERR_INVALID_WQE:
-               return -ENOSYS;
-       case CCERR_QP_NOT_PRIVILEGED:
-               return -EPERM;
-       case CCERR_STACK_ERROR:
-               return -EPROTO;
-       case CCERR_ACCESS_VIOLATION:
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return -EFAULT;
-       case CCERR_STAG_STATE_NOT_INVALID:
-       case CCERR_INVALID_ADDRESS:
-       case CCERR_INVALID_CQ:
-       case CCERR_INVALID_EP:
-       case CCERR_INVALID_MODIFIER:
-       case CCERR_INVALID_MTU:
-       case CCERR_INVALID_PD_ID:
-       case CCERR_INVALID_QP:
-       case CCERR_INVALID_RNIC:
-       case CCERR_INVALID_STAG:
-               return -EINVAL;
-       default:
-               return -EAGAIN;
-       }
-}
-
-/* Device */
-int c2_register_device(struct c2_dev *c2dev);
-void c2_unregister_device(struct c2_dev *c2dev);
-int c2_rnic_init(struct c2_dev *c2dev);
-void c2_rnic_term(struct c2_dev *c2dev);
-void c2_rnic_interrupt(struct c2_dev *c2dev);
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-
-/* QPs */
-int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
-                      struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                       struct ib_qp_attr *attr, int attr_mask);
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                                int ord, int ird);
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                       struct ib_send_wr **bad_wr);
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                          struct ib_recv_wr **bad_wr);
-void c2_init_qp_table(struct c2_dev *c2dev);
-void c2_cleanup_qp_table(struct c2_dev *c2dev);
-void c2_set_qp_state(struct c2_qp *, int);
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
-
-/* PDs */
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-int c2_init_pd_table(struct c2_dev *c2dev);
-void c2_cleanup_pd_table(struct c2_dev *c2dev);
-
-/* CQs */
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-                     struct c2_ucontext *ctx, struct c2_cq *cq);
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-
-/* CM */
-int c2_llp_connect(struct iw_cm_id *cm_id,
-                         struct iw_cm_conn_param *iw_param);
-int c2_llp_accept(struct iw_cm_id *cm_id,
-                        struct iw_cm_conn_param *iw_param);
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
-                        u8 pdata_len);
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
-int c2_llp_service_destroy(struct iw_cm_id *cm_id);
-
-/* MM */
-int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                                     int page_size, int pbl_depth, u32 length,
-                                     u32 off, u64 *va, enum c2_acf acf,
-                                     struct c2_mr *mr);
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
-
-/* AE */
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
-
-/* MQSP Allocator */
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                            struct sp_chunk **root);
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                            dma_addr_t *dma_addr, gfp_t gfp_mask);
-void c2_free_mqsp(__be16* mqsp);
-#endif
diff --git a/drivers/staging/rdma/amso1100/c2_ae.c b/drivers/staging/rdma/amso1100/c2_ae.c
deleted file mode 100644 (file)
index eb7a92b..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_status.h"
-#include "c2_ae.h"
-
-static int c2_convert_cm_status(u32 c2_status)
-{
-       switch (c2_status) {
-       case C2_CONN_STATUS_SUCCESS:
-               return 0;
-       case C2_CONN_STATUS_REJECTED:
-               return -ENETRESET;
-       case C2_CONN_STATUS_REFUSED:
-               return -ECONNREFUSED;
-       case C2_CONN_STATUS_TIMEDOUT:
-               return -ETIMEDOUT;
-       case C2_CONN_STATUS_NETUNREACH:
-               return -ENETUNREACH;
-       case C2_CONN_STATUS_HOSTUNREACH:
-               return -EHOSTUNREACH;
-       case C2_CONN_STATUS_INVALID_RNIC:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP_STATE:
-               return -EINVAL;
-       case C2_CONN_STATUS_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       default:
-               printk(KERN_ERR PFX
-                      "%s - Unable to convert CM status: %d\n",
-                      __func__, c2_status);
-               return -EIO;
-       }
-}
-
-static const char* to_event_str(int event)
-{
-       static const char* event_str[] = {
-               "CCAE_REMOTE_SHUTDOWN",
-               "CCAE_ACTIVE_CONNECT_RESULTS",
-               "CCAE_CONNECTION_REQUEST",
-               "CCAE_LLP_CLOSE_COMPLETE",
-               "CCAE_TERMINATE_MESSAGE_RECEIVED",
-               "CCAE_LLP_CONNECTION_RESET",
-               "CCAE_LLP_CONNECTION_LOST",
-               "CCAE_LLP_SEGMENT_SIZE_INVALID",
-               "CCAE_LLP_INVALID_CRC",
-               "CCAE_LLP_BAD_FPDU",
-               "CCAE_INVALID_DDP_VERSION",
-               "CCAE_INVALID_RDMA_VERSION",
-               "CCAE_UNEXPECTED_OPCODE",
-               "CCAE_INVALID_DDP_QUEUE_NUMBER",
-               "CCAE_RDMA_READ_NOT_ENABLED",
-               "CCAE_RDMA_WRITE_NOT_ENABLED",
-               "CCAE_RDMA_READ_TOO_SMALL",
-               "CCAE_NO_L_BIT",
-               "CCAE_TAGGED_INVALID_STAG",
-               "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
-               "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_TAGGED_INVALID_PD",
-               "CCAE_WRAP_ERROR",
-               "CCAE_BAD_CLOSE",
-               "CCAE_BAD_LLP_CLOSE",
-               "CCAE_INVALID_MSN_RANGE",
-               "CCAE_INVALID_MSN_GAP",
-               "CCAE_IRRQ_OVERFLOW",
-               "CCAE_IRRQ_MSN_GAP",
-               "CCAE_IRRQ_MSN_RANGE",
-               "CCAE_IRRQ_INVALID_STAG",
-               "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
-               "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_IRRQ_INVALID_PD",
-               "CCAE_IRRQ_WRAP_ERROR",
-               "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
-               "CCAE_CQ_RQ_COMPLETION_ERROR",
-               "CCAE_QP_SRQ_WQE_ERROR",
-               "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
-               "CCAE_CQ_OVERFLOW",
-               "CCAE_CQ_OPERATION_ERROR",
-               "CCAE_SRQ_LIMIT_REACHED",
-               "CCAE_QP_RQ_LIMIT_REACHED",
-               "CCAE_SRQ_CATASTROPHIC_ERROR",
-               "CCAE_RNIC_CATASTROPHIC_ERROR"
-       };
-
-       if (event < CCAE_REMOTE_SHUTDOWN ||
-           event > CCAE_RNIC_CATASTROPHIC_ERROR)
-               return "<invalid event>";
-
-       event -= CCAE_REMOTE_SHUTDOWN;
-       return event_str[event];
-}
-
-static const char *to_qp_state_str(int state)
-{
-       switch (state) {
-       case C2_QP_STATE_IDLE:
-               return "C2_QP_STATE_IDLE";
-       case C2_QP_STATE_CONNECTING:
-               return "C2_QP_STATE_CONNECTING";
-       case C2_QP_STATE_RTS:
-               return "C2_QP_STATE_RTS";
-       case C2_QP_STATE_CLOSING:
-               return "C2_QP_STATE_CLOSING";
-       case C2_QP_STATE_TERMINATE:
-               return "C2_QP_STATE_TERMINATE";
-       case C2_QP_STATE_ERROR:
-               return "C2_QP_STATE_ERROR";
-       default:
-               return "<invalid QP state>";
-       }
-}
-
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_mq *mq = c2dev->qptr_array[mq_index];
-       union c2wr *wr;
-       void *resource_user_context;
-       struct iw_cm_event cm_event;
-       struct ib_event ib_event;
-       enum c2_resource_indicator resource_indicator;
-       enum c2_event_id event_id;
-       unsigned long flags;
-       int status;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
-
-       /*
-        * retrieve the message
-        */
-       wr = c2_mq_consume(mq);
-       if (!wr)
-               return;
-
-       memset(&ib_event, 0, sizeof(ib_event));
-       memset(&cm_event, 0, sizeof(cm_event));
-
-       event_id = c2_wr_get_id(wr);
-       resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
-       resource_user_context =
-           (void *) (unsigned long) wr->ae.ae_generic.user_context;
-
-       status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
-
-       pr_debug("event received c2_dev=%p, event_id=%d, "
-               "resource_indicator=%d, user_context=%p, status = %d\n",
-               c2dev, event_id, resource_indicator, resource_user_context,
-               status);
-
-       switch (resource_indicator) {
-       case C2_RES_IND_QP:{
-
-               struct c2_qp *qp = resource_user_context;
-               struct iw_cm_id *cm_id = qp->cm_id;
-               struct c2wr_ae_active_connect_results *res;
-
-               if (!cm_id) {
-                       pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
-                               qp);
-                       goto ignore_it;
-               }
-               pr_debug("%s: event = %s, user_context=%llx, "
-                       "resource_type=%x, "
-                       "resource=%x, qp_state=%s\n",
-                       __func__,
-                       to_event_str(event_id),
-                       (unsigned long long) wr->ae.ae_generic.user_context,
-                       be32_to_cpu(wr->ae.ae_generic.resource_type),
-                       be32_to_cpu(wr->ae.ae_generic.resource),
-                       to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
-
-               c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
-
-               switch (event_id) {
-               case CCAE_ACTIVE_CONNECT_RESULTS:
-                       res = &wr->ae.ae_active_connect_results;
-                       cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-                       laddr->sin_addr.s_addr = res->laddr;
-                       raddr->sin_addr.s_addr = res->raddr;
-                       laddr->sin_port = res->lport;
-                       raddr->sin_port = res->rport;
-                       if (status == 0) {
-                               cm_event.private_data_len =
-                                       be32_to_cpu(res->private_data_length);
-                               cm_event.private_data = res->private_data;
-                       } else {
-                               spin_lock_irqsave(&qp->lock, flags);
-                               if (qp->cm_id) {
-                                       qp->cm_id->rem_ref(qp->cm_id);
-                                       qp->cm_id = NULL;
-                               }
-                               spin_unlock_irqrestore(&qp->lock, flags);
-                               cm_event.private_data_len = 0;
-                               cm_event.private_data = NULL;
-                       }
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               case CCAE_TERMINATE_MESSAGE_RECEIVED:
-               case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
-                       ib_event.device = &c2dev->ibdev;
-                       ib_event.element.qp = &qp->ibqp;
-                       ib_event.event = IB_EVENT_QP_REQ_ERR;
-
-                       if (qp->ibqp.event_handler)
-                               qp->ibqp.event_handler(&ib_event,
-                                                      qp->ibqp.
-                                                      qp_context);
-                       break;
-               case CCAE_BAD_CLOSE:
-               case CCAE_LLP_CLOSE_COMPLETE:
-               case CCAE_LLP_CONNECTION_RESET:
-               case CCAE_LLP_CONNECTION_LOST:
-                       BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
-
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id) {
-                               qp->cm_id->rem_ref(qp->cm_id);
-                               qp->cm_id = NULL;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-                       cm_event.event = IW_CM_EVENT_CLOSE;
-                       cm_event.status = 0;
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               default:
-                       BUG_ON(1);
-                       pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
-                               "CM_ID=%p\n",
-                               __func__, __LINE__,
-                               event_id, qp, cm_id);
-                       break;
-               }
-               break;
-       }
-
-       case C2_RES_IND_EP:{
-
-               struct c2wr_ae_connection_request *req =
-                       &wr->ae.ae_connection_request;
-               struct iw_cm_id *cm_id =
-                       resource_user_context;
-
-               pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
-               if (event_id != CCAE_CONNECTION_REQUEST) {
-                       pr_debug("%s: Invalid event_id: %d\n",
-                               __func__, event_id);
-                       break;
-               }
-               cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-               cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-               laddr->sin_addr.s_addr = req->laddr;
-               raddr->sin_addr.s_addr = req->raddr;
-               laddr->sin_port = req->lport;
-               raddr->sin_port = req->rport;
-               cm_event.private_data_len =
-                       be32_to_cpu(req->private_data_length);
-               cm_event.private_data = req->private_data;
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-
-               if (cm_id->event_handler)
-                       cm_id->event_handler(cm_id, &cm_event);
-               break;
-       }
-
-       case C2_RES_IND_CQ:{
-               struct c2_cq *cq =
-                   resource_user_context;
-
-               pr_debug("IB_EVENT_CQ_ERR\n");
-               ib_event.device = &c2dev->ibdev;
-               ib_event.element.cq = &cq->ibcq;
-               ib_event.event = IB_EVENT_CQ_ERR;
-
-               if (cq->ibcq.event_handler)
-                       cq->ibcq.event_handler(&ib_event,
-                                              cq->ibcq.cq_context);
-               break;
-       }
-
-       default:
-               printk("Bad resource indicator = %d\n",
-                      resource_indicator);
-               break;
-       }
-
- ignore_it:
-       c2_mq_free(mq);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_ae.h b/drivers/staging/rdma/amso1100/c2_ae.h
deleted file mode 100644 (file)
index 3a065c3..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_AE_H_
-#define _C2_AE_H_
-
-/*
- * WARNING: If you change this file, also bump C2_IVN_BASE
- * in common/include/clustercore/c2_ivn.h.
- */
-
-/*
- * Asynchronous Event Identifiers
- *
- * These start at 0x80 only so it's obvious from inspection that
- * they are not work-request statuses.  This isn't critical.
- *
- * NOTE: these event id's must fit in eight bits.
- */
-enum c2_event_id {
-       CCAE_REMOTE_SHUTDOWN = 0x80,
-       CCAE_ACTIVE_CONNECT_RESULTS,
-       CCAE_CONNECTION_REQUEST,
-       CCAE_LLP_CLOSE_COMPLETE,
-       CCAE_TERMINATE_MESSAGE_RECEIVED,
-       CCAE_LLP_CONNECTION_RESET,
-       CCAE_LLP_CONNECTION_LOST,
-       CCAE_LLP_SEGMENT_SIZE_INVALID,
-       CCAE_LLP_INVALID_CRC,
-       CCAE_LLP_BAD_FPDU,
-       CCAE_INVALID_DDP_VERSION,
-       CCAE_INVALID_RDMA_VERSION,
-       CCAE_UNEXPECTED_OPCODE,
-       CCAE_INVALID_DDP_QUEUE_NUMBER,
-       CCAE_RDMA_READ_NOT_ENABLED,
-       CCAE_RDMA_WRITE_NOT_ENABLED,
-       CCAE_RDMA_READ_TOO_SMALL,
-       CCAE_NO_L_BIT,
-       CCAE_TAGGED_INVALID_STAG,
-       CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
-       CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
-       CCAE_TAGGED_INVALID_PD,
-       CCAE_WRAP_ERROR,
-       CCAE_BAD_CLOSE,
-       CCAE_BAD_LLP_CLOSE,
-       CCAE_INVALID_MSN_RANGE,
-       CCAE_INVALID_MSN_GAP,
-       CCAE_IRRQ_OVERFLOW,
-       CCAE_IRRQ_MSN_GAP,
-       CCAE_IRRQ_MSN_RANGE,
-       CCAE_IRRQ_INVALID_STAG,
-       CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
-       CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
-       CCAE_IRRQ_INVALID_PD,
-       CCAE_IRRQ_WRAP_ERROR,
-       CCAE_CQ_SQ_COMPLETION_OVERFLOW,
-       CCAE_CQ_RQ_COMPLETION_ERROR,
-       CCAE_QP_SRQ_WQE_ERROR,
-       CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
-       CCAE_CQ_OVERFLOW,
-       CCAE_CQ_OPERATION_ERROR,
-       CCAE_SRQ_LIMIT_REACHED,
-       CCAE_QP_RQ_LIMIT_REACHED,
-       CCAE_SRQ_CATASTROPHIC_ERROR,
-       CCAE_RNIC_CATASTROPHIC_ERROR
-/* WARNING If you add more id's, make sure their values fit in eight bits. */
-};
-
-/*
- * Resource Indicators and Identifiers
- */
-enum c2_resource_indicator {
-       C2_RES_IND_QP = 1,
-       C2_RES_IND_EP,
-       C2_RES_IND_CQ,
-       C2_RES_IND_SRQ,
-};
-
-#endif /* _C2_AE_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_alloc.c b/drivers/staging/rdma/amso1100/c2_alloc.c
deleted file mode 100644 (file)
index 039872d..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/bitmap.h>
-
-#include "c2.h"
-
-static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
-                              struct sp_chunk **head)
-{
-       int i;
-       struct sp_chunk *new_head;
-       dma_addr_t dma_addr;
-
-       new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
-                                     &dma_addr, gfp_mask);
-       if (new_head == NULL)
-               return -ENOMEM;
-
-       new_head->dma_addr = dma_addr;
-       dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
-
-       new_head->next = NULL;
-       new_head->head = 0;
-
-       /* build list where each index is the next free slot */
-       for (i = 0;
-            i < (PAGE_SIZE - sizeof(struct sp_chunk) -
-                 sizeof(u16)) / sizeof(u16) - 1;
-            i++) {
-               new_head->shared_ptr[i] = i + 1;
-       }
-       /* terminate list */
-       new_head->shared_ptr[i] = 0xFFFF;
-
-       *head = new_head;
-       return 0;
-}
-
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                     struct sp_chunk **root)
-{
-       return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
-}
-
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
-{
-       struct sp_chunk *next;
-
-       while (root) {
-               next = root->next;
-               dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
-                                 dma_unmap_addr(root, mapping));
-               root = next;
-       }
-}
-
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                     dma_addr_t *dma_addr, gfp_t gfp_mask)
-{
-       u16 mqsp;
-
-       while (head) {
-               mqsp = head->head;
-               if (mqsp != 0xFFFF) {
-                       head->head = head->shared_ptr[mqsp];
-                       break;
-               } else if (head->next == NULL) {
-                       if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
-                           0) {
-                               head = head->next;
-                               mqsp = head->head;
-                               head->head = head->shared_ptr[mqsp];
-                               break;
-                       } else
-                               return NULL;
-               } else
-                       head = head->next;
-       }
-       if (head) {
-               *dma_addr = head->dma_addr +
-                           ((unsigned long) &(head->shared_ptr[mqsp]) -
-                            (unsigned long) head);
-               pr_debug("%s addr %p dma_addr %llx\n", __func__,
-                        &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-               return (__force __be16 *) &(head->shared_ptr[mqsp]);
-       }
-       return NULL;
-}
-
-void c2_free_mqsp(__be16 *mqsp)
-{
-       struct sp_chunk *head;
-       u16 idx;
-
-       /* The chunk containing this ptr begins at the page boundary */
-       head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
-
-       /* Link head to new mqsp */
-       *mqsp = (__force __be16) head->head;
-
-       /* Compute the shared_ptr index */
-       idx = (offset_in_page(mqsp)) >> 1;
-       idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
-
-       /* Point this index at the head */
-       head->shared_ptr[idx] = head->head;
-
-       /* Point head at this index */
-       head->head = idx;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cm.c b/drivers/staging/rdma/amso1100/c2_cm.c
deleted file mode 100644 (file)
index f8dbdb9..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_vq.h"
-#include <rdma/iw_cm.h>
-
-int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct ib_qp *ibqp;
-       struct c2_qp *qp;
-       struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-
-       if (cm_id->remote_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Associate QP <--> CM_ID */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       /*
-        * only support the max private_data length
-        */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail0;
-       }
-       /*
-        * Set the rdma read limits
-        */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /*
-        * Create and send a WR_QP_CONNECT...
-        */
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       c2_wr_set_id(wr, CCWR_QP_CONNECT);
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->qp_handle = qp->adapter_handle;
-
-       wr->remote_addr = raddr->sin_addr.s_addr;
-       wr->remote_port = raddr->sin_port;
-
-       /*
-        * Move any private data from the callers's buf into
-        * the WR.
-        */
-       if (iw_param->private_data) {
-               wr->private_data_length =
-                       cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0], iw_param->private_data,
-                      iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /*
-        * Send WR to adapter.  NOTE: There is no synch reply from
-        * the adapter.
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       vq_req_free(c2dev, vq_req);
-
- bail1:
-       kfree(wr);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_create_req wr;
-       struct c2wr_ep_listen_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-
-       if (cm_id->local_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.local_addr = laddr->sin_addr.s_addr;
-       wr.local_port = laddr->sin_port;
-       wr.backlog = cpu_to_be32(backlog);
-       wr.user_context = (u64) (unsigned long) cm_id;
-
-       /*
-        * Reference the request struct.  Dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply =
-           (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail1;
-
-       /*
-        * Keep the adapter handle. Used in subsequent destroy
-        */
-       cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
-
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
- bail1:
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-
-int c2_llp_service_destroy(struct iw_cm_id *cm_id)
-{
-
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_destroy_req wr;
-       struct c2wr_ep_listen_destroy_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct c2_qp *qp;
-       struct ib_qp *ibqp;
-       struct c2wr_cr_accept_req *wr;  /* variable length WR */
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_accept_rep *reply;       /* VQ Reply msg ptr. */
-       int err;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Set the RDMA read limits */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /* Allocate verbs request. */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       vq_req->qp = qp;
-       vq_req->cm_id = cm_id;
-       vq_req->event = IW_CM_EVENT_ESTABLISHED;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Build the WR */
-       c2_wr_set_id(wr, CCWR_CR_ACCEPT);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
-       wr->qp_handle = qp->adapter_handle;
-
-       /* Replace the cr_handle with the QP after accept */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       cm_id->provider_data = qp;
-
-       /* Validate private_data length */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail1;
-       }
-
-       if (iw_param->private_data) {
-               wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0],
-                      iw_param->private_data, iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /* Reference the request struct.  Dereferenced in the int handler. */
-       vq_req_get(c2dev, vq_req);
-
-       /* Send WR to adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /* Wait for reply from adapter */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       /* Check that reply is present */
-       reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-       if (!err)
-               c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail1:
-       kfree(wr);
-       vq_req_free(c2dev, vq_req);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_cr_reject_req wr;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_reject_rep *reply;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_CR_REJECT);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_cr_reject_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       err = c2_errno(reply);
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
deleted file mode 100644 (file)
index 3ef881f..0000000
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
-
-static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
-{
-       struct c2_cq *cq;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-       cq = c2dev->qptr_array[cqn];
-       if (!cq) {
-               spin_unlock_irqrestore(&c2dev->lock, flags);
-               return NULL;
-       }
-       atomic_inc(&cq->refcount);
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-       return cq;
-}
-
-static void c2_cq_put(struct c2_cq *cq)
-{
-       if (atomic_dec_and_test(&cq->refcount))
-               wake_up(&cq->wait);
-}
-
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_cq *cq;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq) {
-               printk("discarding events on destroyed CQN=%d\n", mq_index);
-               return;
-       }
-
-       (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
-       c2_cq_put(cq);
-}
-
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
-{
-       struct c2_cq *cq;
-       struct c2_mq *q;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq)
-               return;
-
-       spin_lock_irq(&cq->lock);
-       q = &cq->mq;
-       if (q && !c2_mq_empty(q)) {
-               u16 priv = q->priv;
-               struct c2wr_ce *msg;
-
-               while (priv != be16_to_cpu(*q->shared)) {
-                       msg = (struct c2wr_ce *)
-                               (q->msg_pool.host + priv * q->msg_size);
-                       if (msg->qp_user_context == (u64) (unsigned long) qp) {
-                               msg->qp_user_context = (u64) 0;
-                       }
-                       priv = (priv + 1) % q->q_size;
-               }
-       }
-       spin_unlock_irq(&cq->lock);
-       c2_cq_put(cq);
-}
-
-static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
-{
-       switch (status) {
-       case C2_OK:
-               return IB_WC_SUCCESS;
-       case CCERR_FLUSHED:
-               return IB_WC_WR_FLUSH_ERR;
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return IB_WC_LOC_PROT_ERR;
-       case CCERR_ACCESS_VIOLATION:
-               return IB_WC_LOC_ACCESS_ERR;
-       case CCERR_TOTAL_LENGTH_TOO_BIG:
-               return IB_WC_LOC_LEN_ERR;
-       case CCERR_INVALID_WINDOW:
-               return IB_WC_MW_BIND_ERR;
-       default:
-               return IB_WC_GENERAL_ERR;
-       }
-}
-
-
-static inline int c2_poll_one(struct c2_dev *c2dev,
-                             struct c2_cq *cq, struct ib_wc *entry)
-{
-       struct c2wr_ce *ce;
-       struct c2_qp *qp;
-       int is_recv = 0;
-
-       ce = c2_mq_consume(&cq->mq);
-       if (!ce) {
-               return -EAGAIN;
-       }
-
-       /*
-        * if the qp returned is null then this qp has already
-        * been freed and we are unable process the completion.
-        * try pulling the next message
-        */
-       while ((qp =
-               (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
-               c2_mq_free(&cq->mq);
-               ce = c2_mq_consume(&cq->mq);
-               if (!ce)
-                       return -EAGAIN;
-       }
-
-       entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
-       entry->wr_id = ce->hdr.context;
-       entry->qp = &qp->ibqp;
-       entry->wc_flags = 0;
-       entry->slid = 0;
-       entry->sl = 0;
-       entry->src_qp = 0;
-       entry->dlid_path_bits = 0;
-       entry->pkey_index = 0;
-
-       switch (c2_wr_get_id(ce)) {
-       case C2_WR_TYPE_SEND:
-               entry->opcode = IB_WC_SEND;
-               break;
-       case C2_WR_TYPE_RDMA_WRITE:
-               entry->opcode = IB_WC_RDMA_WRITE;
-               break;
-       case C2_WR_TYPE_RDMA_READ:
-               entry->opcode = IB_WC_RDMA_READ;
-               break;
-       case C2_WR_TYPE_BIND_MW:
-               entry->opcode = IB_WC_BIND_MW;
-               break;
-       case C2_WR_TYPE_RECV:
-               entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
-               entry->opcode = IB_WC_RECV;
-               is_recv = 1;
-               break;
-       default:
-               break;
-       }
-
-       /* consume the WQEs */
-       if (is_recv)
-               c2_mq_lconsume(&qp->rq_mq, 1);
-       else
-               c2_mq_lconsume(&qp->sq_mq,
-                              be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
-
-       /* free the message */
-       c2_mq_free(&cq->mq);
-
-       return 0;
-}
-
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct c2_dev *c2dev = to_c2dev(ibcq->device);
-       struct c2_cq *cq = to_c2cq(ibcq);
-       unsigned long flags;
-       int npolled, err;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       for (npolled = 0; npolled < num_entries; ++npolled) {
-
-               err = c2_poll_one(c2dev, cq, entry + npolled);
-               if (err)
-                       break;
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return npolled;
-}
-
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct c2_mq_shared __iomem *shared;
-       struct c2_cq *cq;
-       unsigned long flags;
-       int ret = 0;
-
-       cq = to_c2cq(ibcq);
-       shared = cq->mq.peer;
-
-       if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
-       else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
-       else
-               return -EINVAL;
-
-       writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
-
-       /*
-        * Now read back shared->armed to make the PCI
-        * write synchronous.  This is necessary for
-        * correct cq notification semantics.
-        */
-       readb(&shared->armed);
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               spin_lock_irqsave(&cq->lock, flags);
-               ret = !c2_mq_empty(&cq->mq);
-               spin_unlock_irqrestore(&cq->lock, flags);
-       }
-
-       return ret;
-}
-
-static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
-{
-       dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
-                         mq->msg_pool.host, dma_unmap_addr(mq, mapping));
-}
-
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
-                          size_t q_size, size_t msg_size)
-{
-       u8 *pool_start;
-
-       if (q_size > SIZE_MAX / msg_size)
-               return -EINVAL;
-
-       pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
-                                       &mq->host_dma, GFP_KERNEL);
-       if (!pool_start)
-               return -ENOMEM;
-
-       c2_mq_rep_init(mq,
-                      0,               /* index (currently unknown) */
-                      q_size,
-                      msg_size,
-                      pool_start,
-                      NULL,    /* peer (currently unknown) */
-                      C2_MQ_HOST_TARGET);
-
-       dma_unmap_addr_set(mq, mapping, mq->host_dma);
-
-       return 0;
-}
-
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-              struct c2_ucontext *ctx, struct c2_cq *cq)
-{
-       struct c2wr_cq_create_req wr;
-       struct c2wr_cq_create_rep *reply;
-       unsigned long peer_pa;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       might_sleep();
-
-       cq->ibcq.cqe = entries - 1;
-       cq->is_kernel = !ctx;
-
-       /* Allocate a shared pointer */
-       cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                     &cq->mq.shared_dma, GFP_KERNEL);
-       if (!cq->mq.shared)
-               return -ENOMEM;
-
-       /* Allocate pages for the message pool */
-       err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
-       if (err)
-               goto bail0;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.msg_size = cpu_to_be32(cq->mq.msg_size);
-       wr.depth = cpu_to_be32(cq->mq.q_size);
-       wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
-       wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
-       wr.user_context = (u64) (unsigned long) (cq);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail2;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail2;
-
-       reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail3;
-
-       cq->adapter_handle = reply->cq_handle;
-       cq->mq.index = be32_to_cpu(reply->mq_index);
-
-       peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
-       cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
-       if (!cq->mq.peer) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       spin_lock_init(&cq->lock);
-       atomic_set(&cq->refcount, 1);
-       init_waitqueue_head(&cq->wait);
-
-       /*
-        * Use the MQ index allocated by the adapter to
-        * store the CQ in the qptr_array
-        */
-       cq->cqn = cq->mq.index;
-       c2dev->qptr_array[cq->cqn] = cq;
-
-       return 0;
-
-bail3:
-       vq_repbuf_free(c2dev, reply);
-bail2:
-       vq_req_free(c2dev, vq_req);
-bail1:
-       c2_free_cq_buf(c2dev, &cq->mq);
-bail0:
-       c2_free_mqsp(cq->mq.shared);
-
-       return err;
-}
-
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
-{
-       int err;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cq_destroy_req wr;
-       struct c2wr_cq_destroy_rep *reply;
-
-       might_sleep();
-
-       /* Clear CQ from the qptr array */
-       spin_lock_irq(&c2dev->lock);
-       c2dev->qptr_array[cq->mq.index] = NULL;
-       atomic_dec(&cq->refcount);
-       spin_unlock_irq(&c2dev->lock);
-
-       wait_event(cq->wait, !atomic_read(&cq->refcount));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               goto bail0;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.cq_handle = cq->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (reply)
-               vq_repbuf_free(c2dev, reply);
-bail1:
-       vq_req_free(c2dev, vq_req);
-bail0:
-       if (cq->is_kernel) {
-               c2_free_cq_buf(c2dev, &cq->mq);
-       }
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_intr.c b/drivers/staging/rdma/amso1100/c2_intr.c
deleted file mode 100644 (file)
index 74b32a9..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_vq.h"
-
-static void handle_mq(struct c2_dev *c2dev, u32 index);
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
-
-/*
- * Handle RNIC interrupts
- */
-void c2_rnic_interrupt(struct c2_dev *c2dev)
-{
-       unsigned int mq_index;
-
-       while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
-               mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
-               if (mq_index & 0x80000000) {
-                       break;
-               }
-
-               c2dev->hints_read++;
-               handle_mq(c2dev, mq_index);
-       }
-
-}
-
-/*
- * Top level MQ handler
- */
-static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
-{
-       if (c2dev->qptr_array[mq_index] == NULL) {
-               pr_debug("handle_mq: stray activity for mq_index=%d\n",
-                        mq_index);
-               return;
-       }
-
-       switch (mq_index) {
-       case (0):
-               /*
-                * An index of 0 in the activity queue
-                * indicates the req vq now has messages
-                * available...
-                *
-                * Wake up any waiters waiting on req VQ
-                * message availability.
-                */
-               wake_up(&c2dev->req_vq_wo);
-               break;
-       case (1):
-               handle_vq(c2dev, mq_index);
-               break;
-       case (2):
-               /* We have to purge the VQ in case there are pending
-                * accept reply requests that would result in the
-                * generation of an ESTABLISHED event. If we don't
-                * generate these first, a CLOSE event could end up
-                * being delivered before the ESTABLISHED event.
-                */
-               handle_vq(c2dev, 1);
-
-               c2_ae_event(c2dev, mq_index);
-               break;
-       default:
-               /* There is no event synchronization between CQ events
-                * and AE or CM events. In fact, CQE could be
-                * delivered for all of the I/O up to and including the
-                * FLUSH for a peer disconenct prior to the ESTABLISHED
-                * event being delivered to the app. The reason for this
-                * is that CM events are delivered on a thread, while AE
-                * and CM events are delivered on interrupt context.
-                */
-               c2_cq_event(c2dev, mq_index);
-               break;
-       }
-
-       return;
-}
-
-/*
- * Handles verbs WR replies.
- */
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
-{
-       void *adapter_msg, *reply_msg;
-       struct c2wr_hdr *host_msg;
-       struct c2wr_hdr tmp;
-       struct c2_mq *reply_vq;
-       struct c2_vq_req *req;
-       struct iw_cm_event cm_event;
-       int err;
-
-       reply_vq = c2dev->qptr_array[mq_index];
-
-       /*
-        * get next msg from mq_index into adapter_msg.
-        * don't free it yet.
-        */
-       adapter_msg = c2_mq_consume(reply_vq);
-       if (adapter_msg == NULL) {
-               return;
-       }
-
-       host_msg = vq_repbuf_alloc(c2dev);
-
-       /*
-        * If we can't get a host buffer, then we'll still
-        * wakeup the waiter, we just won't give him the msg.
-        * It is assumed the waiter will deal with this...
-        */
-       if (!host_msg) {
-               pr_debug("handle_vq: no repbufs!\n");
-
-               /*
-                * just copy the WR header into a local variable.
-                * this allows us to still demux on the context
-                */
-               host_msg = &tmp;
-               memcpy(host_msg, adapter_msg, sizeof(tmp));
-               reply_msg = NULL;
-       } else {
-               memcpy(host_msg, adapter_msg, reply_vq->msg_size);
-               reply_msg = host_msg;
-       }
-
-       /*
-        * consume the msg from the MQ
-        */
-       c2_mq_free(reply_vq);
-
-       /*
-        * wakeup the waiter.
-        */
-       req = (struct c2_vq_req *) (unsigned long) host_msg->context;
-       if (req == NULL) {
-               /*
-                * We should never get here, as the adapter should
-                * never send us a reply that we're not expecting.
-                */
-               if (reply_msg != NULL)
-                       vq_repbuf_free(c2dev, host_msg);
-               pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
-               return;
-       }
-
-       if (reply_msg)
-               err = c2_errno(reply_msg);
-       else
-               err = -ENOMEM;
-
-       if (!err) switch (req->event) {
-       case IW_CM_EVENT_ESTABLISHED:
-               c2_set_qp_state(req->qp,
-                               C2_QP_STATE_RTS);
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-       case IW_CM_EVENT_CLOSE:
-
-               /*
-                * Move the QP to RTS if this is
-                * the established event
-                */
-               cm_event.event = req->event;
-               cm_event.status = 0;
-               cm_event.local_addr = req->cm_id->local_addr;
-               cm_event.remote_addr = req->cm_id->remote_addr;
-               cm_event.private_data = NULL;
-               cm_event.private_data_len = 0;
-               req->cm_id->event_handler(req->cm_id, &cm_event);
-               break;
-       default:
-               break;
-       }
-
-       req->reply_msg = (u64) (unsigned long) (reply_msg);
-       atomic_set(&req->reply_ready, 1);
-       wake_up(&req->wait_object);
-
-       /*
-        * If the request was cancelled, then this put will
-        * free the vq_req memory...and reply_msg!!!
-        */
-       vq_req_put(c2dev, req);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mm.c b/drivers/staging/rdma/amso1100/c2_mm.c
deleted file mode 100644 (file)
index 25081e2..0000000
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-
-#define PBL_VIRT 1
-#define PBL_PHYS 2
-
-/*
- * Send all the PBL messages to convey the remainder of the PBL
- * Wait for the adapter's reply on the last one.
- * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
- *
- * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
- *       Reply buffer _is_ freed by this function.
- */
-static int
-send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
-                 unsigned long va, u32 pbl_depth,
-                 struct c2_vq_req *vq_req, int pbl_type)
-{
-       u32 pbe_count;          /* amt that fits in a PBL msg */
-       u32 count;              /* amt in this PBL MSG. */
-       struct c2wr_nsmr_pbl_req *wr;   /* PBL WR ptr */
-       struct c2wr_nsmr_pbl_rep *reply;        /* reply ptr */
-       int err, pbl_virt, pbl_index, i;
-
-       switch (pbl_type) {
-       case PBL_VIRT:
-               pbl_virt = 1;
-               break;
-       case PBL_PHYS:
-               pbl_virt = 0;
-               break;
-       default:
-               return -EINVAL;
-               break;
-       }
-
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               return -ENOMEM;
-       }
-       c2_wr_set_id(wr, CCWR_NSMR_PBL);
-
-       /*
-        * Only the last PBL message will generate a reply from the verbs,
-        * so we set the context to 0 indicating there is no kernel verbs
-        * handler blocked awaiting this reply.
-        */
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->stag_index = stag_index;    /* already swapped */
-       wr->flags = 0;
-       pbl_index = 0;
-       while (pbl_depth) {
-               count = min(pbe_count, pbl_depth);
-               wr->addrs_length = cpu_to_be32(count);
-
-               /*
-                *  If this is the last message, then reference the
-                *  vq request struct cuz we're gonna wait for a reply.
-                *  also make this PBL msg as the last one.
-                */
-               if (count == pbl_depth) {
-                       /*
-                        * reference the request struct.  dereferenced in the
-                        * int handler.
-                        */
-                       vq_req_get(c2dev, vq_req);
-                       wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
-
-                       /*
-                        * This is the last PBL message.
-                        * Set the context to our VQ Request Object so we can
-                        * wait for the reply.
-                        */
-                       wr->hdr.context = (unsigned long) vq_req;
-               }
-
-               /*
-                * If pbl_virt is set then va is a virtual address
-                * that describes a virtually contiguous memory
-                * allocation. The wr needs the start of each virtual page
-                * to be converted to the corresponding physical address
-                * of the page. If pbl_virt is not set then va is an array
-                * of physical addresses and there is no conversion to do.
-                * Just fill in the wr with what is in the array.
-                */
-               for (i = 0; i < count; i++) {
-                       if (pbl_virt) {
-                               va += PAGE_SIZE;
-                       } else {
-                               wr->paddrs[i] =
-                                   cpu_to_be64(((u64 *)va)[pbl_index + i]);
-                       }
-               }
-
-               /*
-                * Send WR to adapter
-                */
-               err = vq_send_wr(c2dev, (union c2wr *) wr);
-               if (err) {
-                       if (count <= pbe_count) {
-                               vq_req_put(c2dev, vq_req);
-                       }
-                       goto bail0;
-               }
-               pbl_depth -= count;
-               pbl_index += count;
-       }
-
-       /*
-        *  Now wait for the reply...
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       kfree(wr);
-       return err;
-}
-
-#define C2_PBL_MAX_DEPTH 131072
-int
-c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                          int page_size, int pbl_depth, u32 length,
-                          u32 offset, u64 *va, enum c2_acf acf,
-                          struct c2_mr *mr)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_nsmr_register_req *wr;
-       struct c2wr_nsmr_register_rep *reply;
-       u16 flags;
-       int i, pbe_count, count;
-       int err;
-
-       if (!va || !length || !addr_list || !pbl_depth)
-               return -EINTR;
-
-       /*
-        * Verify PBL depth is within rnic max
-        */
-       if (pbl_depth > C2_PBL_MAX_DEPTH) {
-               return -EINTR;
-       }
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       /*
-        * build the WR
-        */
-       c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-
-       flags = (acf | MEM_VA_BASED | MEM_REMOTE);
-
-       /*
-        * compute how many pbes can fit in the message
-        */
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
-
-       if (pbl_depth <= pbe_count) {
-               flags |= MEM_PBL_COMPLETE;
-       }
-       wr->flags = cpu_to_be16(flags);
-       wr->stag_key = 0;       //stag_key;
-       wr->va = cpu_to_be64(*va);
-       wr->pd_id = mr->pd->pd_id;
-       wr->pbe_size = cpu_to_be32(page_size);
-       wr->length = cpu_to_be32(length);
-       wr->pbl_depth = cpu_to_be32(pbl_depth);
-       wr->fbo = cpu_to_be32(offset);
-       count = min(pbl_depth, pbe_count);
-       wr->addrs_length = cpu_to_be32(count);
-
-       /*
-        * fill out the PBL for this message
-        */
-       for (i = 0; i < count; i++) {
-               wr->paddrs[i] = cpu_to_be64(addr_list[i]);
-       }
-
-       /*
-        * regerence the request struct
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * send the WR to the adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /*
-        * wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail1;
-       }
-
-       /*
-        * process reply
-        */
-       reply =
-           (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       if ((err = c2_errno(reply))) {
-               goto bail2;
-       }
-       //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
-       mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
-       vq_repbuf_free(c2dev, reply);
-
-       /*
-        * if there are still more PBEs we need to send them to
-        * the adapter and wait for a reply on the final one.
-        * reuse vq_req for this purpose.
-        */
-       pbl_depth -= count;
-       if (pbl_depth) {
-
-               vq_req->reply_msg = (unsigned long) NULL;
-               atomic_set(&vq_req->reply_ready, 0);
-               err = send_pbl_messages(c2dev,
-                                       cpu_to_be32(mr->ibmr.lkey),
-                                       (unsigned long) &addr_list[i],
-                                       pbl_depth, vq_req, PBL_PHYS);
-               if (err) {
-                       goto bail1;
-               }
-       }
-
-       vq_req_free(c2dev, vq_req);
-       kfree(wr);
-
-       return err;
-
-bail2:
-       vq_repbuf_free(c2dev, reply);
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
-{
-       struct c2_vq_req *vq_req;       /* verbs request object */
-       struct c2wr_stag_dealloc_req wr;        /* work request */
-       struct c2wr_stag_dealloc_rep *reply;    /* WR reply  */
-       int err;
-
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.stag_index = cpu_to_be32(stag_index);
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.c b/drivers/staging/rdma/amso1100/c2_mq.c
deleted file mode 100644 (file)
index 7827fb8..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include "c2_mq.h"
-
-void *c2_mq_alloc(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (c2_mq_full(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m =
-                   (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
-               m->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_produce(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (!c2_mq_full(q)) {
-               q->priv = (q->priv + 1) % q->q_size;
-               q->hint_count++;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-void *c2_mq_consume(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (c2_mq_empty(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m = (struct c2wr_hdr *)
-                   (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_free(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (!c2_mq_empty(q)) {
-
-#ifdef CCMSGMAGIC
-               {
-                       struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
-                           (q->msg_pool.adapter + q->priv * q->msg_size);
-                       __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
-               }
-#endif
-               q->priv = (q->priv + 1) % q->q_size;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       while (wqe_count--) {
-               BUG_ON(c2_mq_empty(q));
-               *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
-       }
-}
-
-#if 0
-u32 c2_mq_count(struct c2_mq *q)
-{
-       s32 count;
-
-       if (q->type == C2_MQ_HOST_TARGET)
-               count = be16_to_cpu(*q->shared) - q->priv;
-       else
-               count = q->priv - be16_to_cpu(*q->shared);
-
-       if (count < 0)
-               count += q->q_size;
-
-       return (u32) count;
-}
-#endif  /*  0  */
-
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.adapter = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
-
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.host = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.h b/drivers/staging/rdma/amso1100/c2_mq.h
deleted file mode 100644 (file)
index 8e1b4d1..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _C2_MQ_H_
-#define _C2_MQ_H_
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include "c2_wr.h"
-
-enum c2_shared_regs {
-
-       C2_SHARED_ARMED = 0x10,
-       C2_SHARED_NOTIFY = 0x18,
-       C2_SHARED_SHARED = 0x40,
-};
-
-struct c2_mq_shared {
-       u16 unused1;
-       u8 armed;
-       u8 notification_type;
-       u32 unused2;
-       u16 shared;
-       /* Pad to 64 bytes. */
-       u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
-};
-
-enum c2_mq_type {
-       C2_MQ_HOST_TARGET = 1,
-       C2_MQ_ADAPTER_TARGET = 2,
-};
-
-/*
- * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
- * c2_user_mq_t (which is the same format) is for user-mode MQs...
- */
-#define C2_MQ_MAGIC 0x4d512020 /* 'MQ  ' */
-struct c2_mq {
-       u32 magic;
-       union {
-               u8 *host;
-               u8 __iomem *adapter;
-       } msg_pool;
-       dma_addr_t host_dma;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 hint_count;
-       u16 priv;
-       struct c2_mq_shared __iomem *peer;
-       __be16 *shared;
-       dma_addr_t shared_dma;
-       u32 q_size;
-       u32 msg_size;
-       u32 index;
-       enum c2_mq_type type;
-};
-
-static __inline__ int c2_mq_empty(struct c2_mq *q)
-{
-       return q->priv == be16_to_cpu(*q->shared);
-}
-
-static __inline__ int c2_mq_full(struct c2_mq *q)
-{
-       return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
-}
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
-void *c2_mq_alloc(struct c2_mq *q);
-void c2_mq_produce(struct c2_mq *q);
-void *c2_mq_consume(struct c2_mq *q);
-void c2_mq_free(struct c2_mq *q);
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                      u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                          u8 *pool_start, u16 __iomem *peer, u32 type);
-
-#endif                         /* _C2_MQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_pd.c b/drivers/staging/rdma/amso1100/c2_pd.c
deleted file mode 100644 (file)
index f3e81dc..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-
-#include "c2.h"
-#include "c2_provider.h"
-
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
-{
-       u32 obj;
-       int ret = 0;
-
-       spin_lock(&c2dev->pd_table.lock);
-       obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
-                                c2dev->pd_table.last);
-       if (obj >= c2dev->pd_table.max)
-               obj = find_first_zero_bit(c2dev->pd_table.table,
-                                         c2dev->pd_table.max);
-       if (obj < c2dev->pd_table.max) {
-               pd->pd_id = obj;
-               __set_bit(obj, c2dev->pd_table.table);
-               c2dev->pd_table.last = obj+1;
-               if (c2dev->pd_table.last >= c2dev->pd_table.max)
-                       c2dev->pd_table.last = 0;
-       } else
-               ret = -ENOMEM;
-       spin_unlock(&c2dev->pd_table.lock);
-       return ret;
-}
-
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
-{
-       spin_lock(&c2dev->pd_table.lock);
-       __clear_bit(pd->pd_id, c2dev->pd_table.table);
-       spin_unlock(&c2dev->pd_table.lock);
-}
-
-int c2_init_pd_table(struct c2_dev *c2dev)
-{
-
-       c2dev->pd_table.last = 0;
-       c2dev->pd_table.max = c2dev->props.max_pd;
-       spin_lock_init(&c2dev->pd_table.lock);
-       c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
-                                       sizeof(long), GFP_KERNEL);
-       if (!c2dev->pd_table.table)
-               return -ENOMEM;
-       bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
-       return 0;
-}
-
-void c2_cleanup_pd_table(struct c2_dev *c2dev)
-{
-       kfree(c2dev->pd_table.table);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
deleted file mode 100644 (file)
index a092ac7..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/if_arp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include "c2.h"
-#include "c2_provider.h"
-#include "c2_user.h"
-
-static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                          struct ib_udata *uhw)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       *props = c2dev->props;
-       return 0;
-}
-
-static int c2_query_port(struct ib_device *ibdev,
-                        u8 port, struct ib_port_attr *props)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       props->max_mtu = IB_MTU_4096;
-       props->lid = 0;
-       props->lmc = 0;
-       props->sm_lid = 0;
-       props->sm_sl = 0;
-       props->state = IB_PORT_ACTIVE;
-       props->phys_state = 0;
-       props->port_cap_flags =
-           IB_PORT_CM_SUP |
-           IB_PORT_REINIT_SUP |
-           IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-       props->gid_tbl_len = 1;
-       props->pkey_tbl_len = 1;
-       props->qkey_viol_cntr = 0;
-       props->active_width = 1;
-       props->active_speed = IB_SPEED_SDR;
-
-       return 0;
-}
-
-static int c2_query_pkey(struct ib_device *ibdev,
-                        u8 port, u16 index, u16 * pkey)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       *pkey = 0;
-       return 0;
-}
-
-static int c2_query_gid(struct ib_device *ibdev, u8 port,
-                       int index, union ib_gid *gid)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-       memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
-
-       return 0;
-}
-
-/* Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
-                                            struct ib_udata *udata)
-{
-       struct c2_ucontext *context;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       context = kmalloc(sizeof(*context), GFP_KERNEL);
-       if (!context)
-               return ERR_PTR(-ENOMEM);
-
-       return &context->ibucontext;
-}
-
-static int c2_dealloc_ucontext(struct ib_ucontext *context)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       kfree(context);
-       return 0;
-}
-
-static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
-                                struct ib_ucontext *context,
-                                struct ib_udata *udata)
-{
-       struct c2_pd *pd;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-       if (!pd)
-               return ERR_PTR(-ENOMEM);
-
-       err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
-       if (err) {
-               kfree(pd);
-               return ERR_PTR(err);
-       }
-
-       if (context) {
-               if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
-                       c2_pd_free(to_c2dev(ibdev), pd);
-                       kfree(pd);
-                       return ERR_PTR(-EFAULT);
-               }
-       }
-
-       return &pd->ibpd;
-}
-
-static int c2_dealloc_pd(struct ib_pd *pd)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
-       kfree(pd);
-
-       return 0;
-}
-
-static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return ERR_PTR(-ENOSYS);
-}
-
-static int c2_ah_destroy(struct ib_ah *ah)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static void c2_add_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       atomic_inc(&qp->refcount);
-}
-
-static void c2_rem_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
-{
-       struct c2_dev* c2dev = to_c2dev(device);
-       struct c2_qp *qp;
-
-       qp = c2_find_qpn(c2dev, qpn);
-       pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-               __func__, qp, qpn, device,
-               (qp?atomic_read(&qp->refcount):0));
-
-       return (qp?&qp->ibqp:NULL);
-}
-
-static struct ib_qp *c2_create_qp(struct ib_pd *pd,
-                                 struct ib_qp_init_attr *init_attr,
-                                 struct ib_udata *udata)
-{
-       struct c2_qp *qp;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (init_attr->create_flags)
-               return ERR_PTR(-EINVAL);
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_RC:
-               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-               if (!qp) {
-                       pr_debug("%s: Unable to allocate QP\n", __func__);
-                       return ERR_PTR(-ENOMEM);
-               }
-               spin_lock_init(&qp->lock);
-               if (pd->uobject) {
-                       /* userspace specific */
-               }
-
-               err = c2_alloc_qp(to_c2dev(pd->device),
-                                 to_c2pd(pd), init_attr, qp);
-
-               if (err && pd->uobject) {
-                       /* userspace specific */
-               }
-
-               break;
-       default:
-               pr_debug("%s: Invalid QP type: %d\n", __func__,
-                       init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (err) {
-               kfree(qp);
-               return ERR_PTR(err);
-       }
-
-       return &qp->ibqp;
-}
-
-static int c2_destroy_qp(struct ib_qp *ib_qp)
-{
-       struct c2_qp *qp = to_c2qp(ib_qp);
-
-       pr_debug("%s:%u qp=%p,qp->state=%d\n",
-               __func__, __LINE__, ib_qp, qp->state);
-       c2_free_qp(to_c2dev(ib_qp->device), qp);
-       kfree(qp);
-       return 0;
-}
-
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
-                                 const struct ib_cq_init_attr *attr,
-                                 struct ib_ucontext *context,
-                                 struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct c2_cq *cq;
-       int err;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               pr_debug("%s: Unable to allocate CQ\n", __func__);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
-       if (err) {
-               pr_debug("%s: error initializing CQ\n", __func__);
-               kfree(cq);
-               return ERR_PTR(err);
-       }
-
-       return &cq->ibcq;
-}
-
-static int c2_destroy_cq(struct ib_cq *ib_cq)
-{
-       struct c2_cq *cq = to_c2cq(ib_cq);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2_free_cq(to_c2dev(ib_cq->device), cq);
-       kfree(cq);
-
-       return 0;
-}
-
-static inline u32 c2_convert_access(int acc)
-{
-       return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
-           (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
-           (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
-           C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
-}
-
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
-                                   struct ib_phys_buf *buffer_list,
-                                   int num_phys_buf, int acc, u64 * iova_start)
-{
-       struct c2_mr *mr;
-       u64 *page_list;
-       u32 total_len;
-       int err, i, j, k, page_shift, pbl_depth;
-
-       pbl_depth = 0;
-       total_len = 0;
-
-       page_shift = PAGE_SHIFT;
-       /*
-        * If there is only 1 buffer we assume this could
-        * be a map of all phy mem...use a 32k page_shift.
-        */
-       if (num_phys_buf == 1)
-               page_shift += 3;
-
-       for (i = 0; i < num_phys_buf; i++) {
-
-               if (offset_in_page(buffer_list[i].addr)) {
-                       pr_debug("Unaligned Memory Buffer: 0x%x\n",
-                               (unsigned int) buffer_list[i].addr);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               if (!buffer_list[i].size) {
-                       pr_debug("Invalid Buffer Size\n");
-                       return ERR_PTR(-EINVAL);
-               }
-
-               total_len += buffer_list[i].size;
-               pbl_depth += ALIGN(buffer_list[i].size,
-                                  BIT(page_shift)) >> page_shift;
-       }
-
-       page_list = vmalloc(sizeof(u64) * pbl_depth);
-       if (!page_list) {
-               pr_debug("couldn't vmalloc page_list of size %zd\n",
-                       (sizeof(u64) * pbl_depth));
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0, j = 0; i < num_phys_buf; i++) {
-
-               int naddrs;
-
-               naddrs = ALIGN(buffer_list[i].size,
-                              BIT(page_shift)) >> page_shift;
-               for (k = 0; k < naddrs; k++)
-                       page_list[j++] = (buffer_list[i].addr +
-                                                    (k << page_shift));
-       }
-
-       mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr) {
-               vfree(page_list);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       mr->pd = to_c2pd(ib_pd);
-       mr->umem = NULL;
-       pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
-               "*iova_start %llx, first pa %llx, last pa %llx\n",
-               __func__, page_shift, pbl_depth, total_len,
-               (unsigned long long) *iova_start,
-               (unsigned long long) page_list[0],
-               (unsigned long long) page_list[pbl_depth-1]);
-       err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
-                                        BIT(page_shift), pbl_depth,
-                                        total_len, 0, iova_start,
-                                        c2_convert_access(acc), mr);
-       vfree(page_list);
-       if (err) {
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       return &mr->ibmr;
-}
-
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva = 0;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* AMSO1100 limit */
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                                   u64 virt, int acc, struct ib_udata *udata)
-{
-       u64 *pages;
-       u64 kva = 0;
-       int shift, n, len;
-       int i, k, entry;
-       int err = 0;
-       struct scatterlist *sg;
-       struct c2_pd *c2pd = to_c2pd(pd);
-       struct c2_mr *c2mr;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
-       if (!c2mr)
-               return ERR_PTR(-ENOMEM);
-       c2mr->pd = c2pd;
-
-       c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
-       if (IS_ERR(c2mr->umem)) {
-               err = PTR_ERR(c2mr->umem);
-               kfree(c2mr);
-               return ERR_PTR(err);
-       }
-
-       shift = ffs(c2mr->umem->page_size) - 1;
-       n = c2mr->umem->nmap;
-
-       pages = kmalloc_array(n, sizeof(u64), GFP_KERNEL);
-       if (!pages) {
-               err = -ENOMEM;
-               goto err;
-       }
-
-       i = 0;
-       for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
-               len = sg_dma_len(sg) >> shift;
-               for (k = 0; k < len; ++k) {
-                       pages[i++] =
-                               sg_dma_address(sg) +
-                               (c2mr->umem->page_size * k);
-               }
-       }
-
-       kva = virt;
-       err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
-                                        pages,
-                                        c2mr->umem->page_size,
-                                        i,
-                                        length,
-                                        ib_umem_offset(c2mr->umem),
-                                        &kva,
-                                        c2_convert_access(acc),
-                                        c2mr);
-       kfree(pages);
-       if (err)
-               goto err;
-       return &c2mr->ibmr;
-
-err:
-       ib_umem_release(c2mr->umem);
-       kfree(c2mr);
-       return ERR_PTR(err);
-}
-
-static int c2_dereg_mr(struct ib_mr *ib_mr)
-{
-       struct c2_mr *mr = to_c2mr(ib_mr);
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
-       if (err)
-               pr_debug("c2_stag_dealloc failed: %d\n", err);
-       else {
-               if (mr->umem)
-                       ib_umem_release(mr->umem);
-               kfree(mr);
-       }
-
-       return err;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x\n", c2dev->props.hw_ver);
-}
-
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
-                          char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x.%x.%x\n",
-                      (int) (c2dev->props.fw_ver >> 32),
-                      (int) (c2dev->props.fw_ver >> 16) & 0xffff,
-                      (int) (c2dev->props.fw_ver & 0xffff));
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "AMSO1100\n");
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *c2_dev_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_fw_ver,
-       &dev_attr_hca_type,
-       &dev_attr_board_id
-};
-
-static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                       int attr_mask, struct ib_udata *udata)
-{
-       int err;
-
-       err =
-           c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
-                        attr_mask);
-
-       return err;
-}
-
-static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_process_mad(struct ib_device *ibdev,
-                         int mad_flags,
-                         u8 port_num,
-                         const struct ib_wc *in_wc,
-                         const struct ib_grh *in_grh,
-                         const struct ib_mad_hdr *in_mad,
-                         size_t in_mad_size,
-                         struct ib_mad_hdr *out_mad,
-                         size_t *out_mad_size,
-                         u16 *out_mad_pkey_index)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Request a connection */
-       return c2_llp_connect(cm_id, iw_param);
-}
-
-static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Accept the new connection */
-       return c2_llp_accept(cm_id, iw_param);
-}
-
-static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_reject(cm_id, pdata, pdata_len);
-}
-
-static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       err = c2_llp_service_create(cm_id, backlog);
-       pr_debug("%s:%u err=%d\n",
-               __func__, __LINE__,
-               err);
-       return err;
-}
-
-static int c2_service_destroy(struct iw_cm_id *cm_id)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_service_destroy(cm_id);
-}
-
-static int c2_pseudo_up(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("adding...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_down(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("deleting...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       kfree_skb(skb);
-       return NETDEV_TX_OK;
-}
-
-static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       /* TODO: Tell rnic about new rmda interface mtu */
-       return 0;
-}
-
-static const struct net_device_ops c2_pseudo_netdev_ops = {
-       .ndo_open               = c2_pseudo_up,
-       .ndo_stop               = c2_pseudo_down,
-       .ndo_start_xmit         = c2_pseudo_xmit_frame,
-       .ndo_change_mtu         = c2_pseudo_change_mtu,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void setup(struct net_device *netdev)
-{
-       netdev->netdev_ops = &c2_pseudo_netdev_ops;
-
-       netdev->watchdog_timeo = 0;
-       netdev->type = ARPHRD_ETHER;
-       netdev->mtu = 1500;
-       netdev->hard_header_len = ETH_HLEN;
-       netdev->addr_len = ETH_ALEN;
-       netdev->tx_queue_len = 0;
-       netdev->flags |= IFF_NOARP;
-}
-
-static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
-{
-       char name[IFNAMSIZ];
-       struct net_device *netdev;
-
-       /* change ethxxx to iwxxx */
-       strcpy(name, "iw");
-       strcat(name, &c2dev->netdev->name[3]);
-       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
-       if (!netdev) {
-               printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-                       __func__);
-               return NULL;
-       }
-
-       netdev->ml_priv = c2dev;
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
-
-#if 0
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-#endif
-       return netdev;
-}
-
-static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
-                            struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = c2_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-       return 0;
-}
-
-int c2_register_device(struct c2_dev *dev)
-{
-       int ret = -ENOMEM;
-       int i;
-
-       /* Register pseudo network device */
-       dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
-       if (!dev->pseudo_netdev)
-               goto out;
-
-       ret = register_netdev(dev->pseudo_netdev);
-       if (ret)
-               goto out_free_netdev;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
-       dev->ibdev.owner = THIS_MODULE;
-       dev->ibdev.uverbs_cmd_mask =
-           (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-           (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_REG_MR) |
-           (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-           (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-           (1ull << IB_USER_VERBS_CMD_POST_RECV);
-
-       dev->ibdev.node_type = RDMA_NODE_RNIC;
-       memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-       memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
-       dev->ibdev.phys_port_cnt = 1;
-       dev->ibdev.num_comp_vectors = 1;
-       dev->ibdev.dma_device = &dev->pcidev->dev;
-       dev->ibdev.query_device = c2_query_device;
-       dev->ibdev.query_port = c2_query_port;
-       dev->ibdev.query_pkey = c2_query_pkey;
-       dev->ibdev.query_gid = c2_query_gid;
-       dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
-       dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
-       dev->ibdev.mmap = c2_mmap_uar;
-       dev->ibdev.alloc_pd = c2_alloc_pd;
-       dev->ibdev.dealloc_pd = c2_dealloc_pd;
-       dev->ibdev.create_ah = c2_ah_create;
-       dev->ibdev.destroy_ah = c2_ah_destroy;
-       dev->ibdev.create_qp = c2_create_qp;
-       dev->ibdev.modify_qp = c2_modify_qp;
-       dev->ibdev.destroy_qp = c2_destroy_qp;
-       dev->ibdev.create_cq = c2_create_cq;
-       dev->ibdev.destroy_cq = c2_destroy_cq;
-       dev->ibdev.poll_cq = c2_poll_cq;
-       dev->ibdev.get_dma_mr = c2_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
-       dev->ibdev.reg_user_mr = c2_reg_user_mr;
-       dev->ibdev.dereg_mr = c2_dereg_mr;
-       dev->ibdev.get_port_immutable = c2_port_immutable;
-
-       dev->ibdev.alloc_fmr = NULL;
-       dev->ibdev.unmap_fmr = NULL;
-       dev->ibdev.dealloc_fmr = NULL;
-       dev->ibdev.map_phys_fmr = NULL;
-
-       dev->ibdev.attach_mcast = c2_multicast_attach;
-       dev->ibdev.detach_mcast = c2_multicast_detach;
-       dev->ibdev.process_mad = c2_process_mad;
-
-       dev->ibdev.req_notify_cq = c2_arm_cq;
-       dev->ibdev.post_send = c2_post_send;
-       dev->ibdev.post_recv = c2_post_receive;
-
-       dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
-       if (dev->ibdev.iwcm == NULL) {
-               ret = -ENOMEM;
-               goto out_unregister_netdev;
-       }
-       dev->ibdev.iwcm->add_ref = c2_add_ref;
-       dev->ibdev.iwcm->rem_ref = c2_rem_ref;
-       dev->ibdev.iwcm->get_qp = c2_get_qp;
-       dev->ibdev.iwcm->connect = c2_connect;
-       dev->ibdev.iwcm->accept = c2_accept;
-       dev->ibdev.iwcm->reject = c2_reject;
-       dev->ibdev.iwcm->create_listen = c2_service_create;
-       dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
-
-       ret = ib_register_device(&dev->ibdev, NULL);
-       if (ret)
-               goto out_free_iwcm;
-
-       for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
-               ret = device_create_file(&dev->ibdev.dev,
-                                              c2_dev_attributes[i]);
-               if (ret)
-                       goto out_unregister_ibdev;
-       }
-       goto out;
-
-out_unregister_ibdev:
-       ib_unregister_device(&dev->ibdev);
-out_free_iwcm:
-       kfree(dev->ibdev.iwcm);
-out_unregister_netdev:
-       unregister_netdev(dev->pseudo_netdev);
-out_free_netdev:
-       free_netdev(dev->pseudo_netdev);
-out:
-       pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
-       return ret;
-}
-
-void c2_unregister_device(struct c2_dev *dev)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       unregister_netdev(dev->pseudo_netdev);
-       free_netdev(dev->pseudo_netdev);
-       ib_unregister_device(&dev->ibdev);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.h b/drivers/staging/rdma/amso1100/c2_provider.h
deleted file mode 100644 (file)
index bf18998..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_PROVIDER_H
-#define C2_PROVIDER_H
-#include <linux/inetdevice.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-
-#include "c2_mq.h"
-#include <rdma/iw_cm.h>
-
-#define C2_MPT_FLAG_ATOMIC        (1 << 14)
-#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
-#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
-#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
-#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
-
-struct c2_buf_list {
-       void *buf;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-};
-
-
-/* The user context keeps track of objects allocated for a
- * particular user-mode client. */
-struct c2_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-struct c2_mtt;
-
-/* All objects associated with a PD are kept in the
- * associated user context if present.
- */
-struct c2_pd {
-       struct ib_pd ibpd;
-       u32 pd_id;
-};
-
-struct c2_mr {
-       struct ib_mr ibmr;
-       struct c2_pd *pd;
-       struct ib_umem *umem;
-};
-
-struct c2_av;
-
-enum c2_ah_type {
-       C2_AH_ON_HCA,
-       C2_AH_PCI_POOL,
-       C2_AH_KMALLOC
-};
-
-struct c2_ah {
-       struct ib_ah ibah;
-};
-
-struct c2_cq {
-       struct ib_cq ibcq;
-       spinlock_t lock;
-       atomic_t refcount;
-       int cqn;
-       int is_kernel;
-       wait_queue_head_t wait;
-
-       u32 adapter_handle;
-       struct c2_mq mq;
-};
-
-struct c2_wq {
-       spinlock_t lock;
-};
-struct iw_cm_id;
-struct c2_qp {
-       struct ib_qp ibqp;
-       struct iw_cm_id *cm_id;
-       spinlock_t lock;
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       int qpn;
-
-       u32 adapter_handle;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u8 state;
-
-       struct c2_mq sq_mq;
-       struct c2_mq rq_mq;
-};
-
-struct c2_cr_query_attrs {
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-};
-
-static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct c2_pd, ibpd);
-}
-
-static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
-{
-       return container_of(ibucontext, struct c2_ucontext, ibucontext);
-}
-
-static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct c2_mr, ibmr);
-}
-
-
-static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct c2_ah, ibah);
-}
-
-static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct c2_cq, ibcq);
-}
-
-static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct c2_qp, ibqp);
-}
-
-static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
-{
-       struct in_device *ind;
-       int ret = 0;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       for_ifa(ind) {
-               if (ifa->ifa_address == addr) {
-                       ret = 1;
-                       break;
-               }
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-       return ret;
-}
-#endif                         /* C2_PROVIDER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
deleted file mode 100644 (file)
index ca364db..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_MAX_ORD_PER_QP 128
-#define C2_MAX_IRD_PER_QP 128
-
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-#define NO_SUPPORT -1
-static const u8 c2_opcode[] = {
-       [IB_WR_SEND] = C2_WR_TYPE_SEND,
-       [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
-};
-
-static int to_c2_state(enum ib_qp_state ib_state)
-{
-       switch (ib_state) {
-       case IB_QPS_RESET:
-               return C2_QP_STATE_IDLE;
-       case IB_QPS_RTS:
-               return C2_QP_STATE_RTS;
-       case IB_QPS_SQD:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_SQE:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_ERR:
-               return C2_QP_STATE_ERROR;
-       default:
-               return -1;
-       }
-}
-
-static int to_ib_state(enum c2_qp_state c2_state)
-{
-       switch (c2_state) {
-       case C2_QP_STATE_IDLE:
-               return IB_QPS_RESET;
-       case C2_QP_STATE_CONNECTING:
-               return IB_QPS_RTR;
-       case C2_QP_STATE_RTS:
-               return IB_QPS_RTS;
-       case C2_QP_STATE_CLOSING:
-               return IB_QPS_SQD;
-       case C2_QP_STATE_ERROR:
-               return IB_QPS_ERR;
-       case C2_QP_STATE_TERMINATE:
-               return IB_QPS_SQE;
-       default:
-               return -1;
-       }
-}
-
-static const char *to_ib_state_str(int ib_state)
-{
-       static const char *state_str[] = {
-               "IB_QPS_RESET",
-               "IB_QPS_INIT",
-               "IB_QPS_RTR",
-               "IB_QPS_RTS",
-               "IB_QPS_SQD",
-               "IB_QPS_SQE",
-               "IB_QPS_ERR"
-       };
-       if (ib_state < IB_QPS_RESET ||
-           ib_state > IB_QPS_ERR)
-               return "<invalid IB QP state>";
-
-       ib_state -= IB_QPS_RESET;
-       return state_str[ib_state];
-}
-
-void c2_set_qp_state(struct c2_qp *qp, int c2_state)
-{
-       int new_state = to_ib_state(c2_state);
-
-       pr_debug("%s: qp[%p] state modify %s --> %s\n",
-              __func__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(new_state));
-       qp->state = new_state;
-}
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                struct ib_qp_attr *attr, int attr_mask)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       unsigned long flags;
-       u8 next_state;
-       int err;
-
-       pr_debug("%s:%d qp=%p, %s --> %s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(attr->qp_state));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       if (attr_mask & IB_QP_STATE) {
-               /* Ensure the state is valid */
-               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
-                       err = -EINVAL;
-                       goto bail0;
-               }
-
-               wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
-
-               if (attr->qp_state == IB_QPS_ERR) {
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-                               pr_debug("Generating CLOSE event for QP-->ERR, "
-                                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-                               /* Generate an CLOSE event */
-                               vq_req->cm_id = qp->cm_id;
-                               vq_req->event = IW_CM_EVENT_CLOSE;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-               }
-               next_state =  attr->qp_state;
-
-       } else if (attr_mask & IB_QP_CUR_STATE) {
-
-               if (attr->cur_qp_state != IB_QPS_RTR &&
-                   attr->cur_qp_state != IB_QPS_RTS &&
-                   attr->cur_qp_state != IB_QPS_SQD &&
-                   attr->cur_qp_state != IB_QPS_SQE) {
-                       err = -EINVAL;
-                       goto bail0;
-               } else
-                       wr.next_qp_state =
-                           cpu_to_be32(to_c2_state(attr->cur_qp_state));
-
-               next_state = attr->cur_qp_state;
-
-       } else {
-               err = 0;
-               goto bail0;
-       }
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       if (!err)
-               qp->state = next_state;
-#ifdef DEBUG
-       else
-               pr_debug("%s: c2_errno=%d\n", __func__, err);
-#endif
-       /*
-        * If we're going to error and generating the event here, then
-        * we need to remove the reference because there will be no
-        * close event generated by the adapter
-       */
-       spin_lock_irqsave(&qp->lock, flags);
-       if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-
-       pr_debug("%s:%d qp=%p, cur_state=%s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state));
-       return err;
-}
-
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                         int ord, int ird)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(ord);
-       wr.ird = cpu_to_be32(ird);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_qp_destroy_req wr;
-       struct c2wr_qp_destroy_rep *reply;
-       unsigned long flags;
-       int err;
-
-       /*
-        * Allocate a verb request message
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Initialize the WR
-        */
-       c2_wr_set_id(&wr, CCWR_QP_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-               pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
-                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-               /* Generate an CLOSE event */
-               vq_req->qp = qp;
-               vq_req->cm_id = qp->cm_id;
-               vq_req->event = IW_CM_EVENT_CLOSE;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       int ret;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irq(&c2dev->qp_table.lock);
-
-       ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
-       if (ret >= 0)
-               qp->qpn = ret;
-
-       spin_unlock_irq(&c2dev->qp_table.lock);
-       idr_preload_end();
-       return ret < 0 ? ret : 0;
-}
-
-static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
-{
-       spin_lock_irq(&c2dev->qp_table.lock);
-       idr_remove(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irq(&c2dev->qp_table.lock);
-}
-
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
-{
-       unsigned long flags;
-       struct c2_qp *qp;
-
-       spin_lock_irqsave(&c2dev->qp_table.lock, flags);
-       qp = idr_find(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
-       return qp;
-}
-
-int c2_alloc_qp(struct c2_dev *c2dev,
-               struct c2_pd *pd,
-               struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
-{
-       struct c2wr_qp_create_req wr;
-       struct c2wr_qp_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
-       struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
-       unsigned long peer_pa;
-       u32 q_size, msg_size, mmap_size;
-       void __iomem *mmap;
-       int err;
-
-       err = c2_alloc_qpn(c2dev, qp);
-       if (err)
-               return err;
-       qp->ibqp.qp_num = qp->qpn;
-       qp->ibqp.qp_type = IB_QPT_RC;
-
-       /* Allocate the SQ and RQ shared pointers */
-       qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->sq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->sq_mq.shared) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->rq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->rq_mq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Allocate the verbs request */
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       /* Initialize the work request */
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_QP_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.sq_cq_handle = send_cq->adapter_handle;
-       wr.rq_cq_handle = recv_cq->adapter_handle;
-       wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
-       wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
-       wr.srq_handle = 0;
-       wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
-                              QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
-       wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
-       wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
-       wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
-       wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
-       wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
-       wr.pd_id = pd->pd_id;
-       wr.user_context = (unsigned long) qp;
-
-       vq_req_get(c2dev, vq_req);
-
-       /* Send the WR to the adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail3;
-       }
-
-       /* Wait for the verb reply  */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail3;
-       }
-
-       /* Process the reply */
-       reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       if ((err = c2_wr_get_result(reply)) != 0) {
-               goto bail4;
-       }
-
-       /* Fill in the kernel QP struct */
-       atomic_set(&qp->refcount, 1);
-       qp->adapter_handle = reply->qp_handle;
-       qp->state = IB_QPS_RESET;
-       qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
-       init_waitqueue_head(&qp->wait);
-
-       /* Initialize the SQ MQ */
-       q_size = be32_to_cpu(reply->sq_depth);
-       msg_size = be32_to_cpu(reply->sq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail5;
-       }
-
-       c2_mq_req_init(&qp->sq_mq,
-                      be32_to_cpu(reply->sq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the RQ mq */
-       q_size = be32_to_cpu(reply->rq_depth);
-       msg_size = be32_to_cpu(reply->rq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail6;
-       }
-
-       c2_mq_req_init(&qp->rq_mq,
-                      be32_to_cpu(reply->rq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
-bail6:
-       iounmap(qp->sq_mq.peer);
-bail5:
-       destroy_qp(c2dev, qp);
-bail4:
-       vq_repbuf_free(c2dev, reply);
-bail3:
-       vq_req_free(c2dev, vq_req);
-bail2:
-       c2_free_mqsp(qp->rq_mq.shared);
-bail1:
-       c2_free_mqsp(qp->sq_mq.shared);
-bail0:
-       c2_free_qpn(c2dev, qp->qpn);
-       return err;
-}
-
-static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_lock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_lock_irq(&send_cq->lock);
-               spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-       } else {
-               spin_lock_irq(&recv_cq->lock);
-               spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-       }
-}
-
-static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_unlock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_unlock(&recv_cq->lock);
-               spin_unlock_irq(&send_cq->lock);
-       } else {
-               spin_unlock(&send_cq->lock);
-               spin_unlock_irq(&recv_cq->lock);
-       }
-}
-
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_cq *send_cq;
-       struct c2_cq *recv_cq;
-
-       send_cq = to_c2cq(qp->ibqp.send_cq);
-       recv_cq = to_c2cq(qp->ibqp.recv_cq);
-
-       /*
-        * Lock CQs here, so that CQ polling code can do QP lookup
-        * without taking a lock.
-        */
-       c2_lock_cqs(send_cq, recv_cq);
-       c2_free_qpn(c2dev, qp->qpn);
-       c2_unlock_cqs(send_cq, recv_cq);
-
-       /*
-        * Destroy qp in the rnic...
-        */
-       destroy_qp(c2dev, qp);
-
-       /*
-        * Mark any unreaped CQEs as null and void.
-        */
-       c2_cq_clean(c2dev, qp, send_cq->cqn);
-       if (send_cq != recv_cq)
-               c2_cq_clean(c2dev, qp, recv_cq->cqn);
-       /*
-        * Unmap the MQs and return the shared pointers
-        * to the message pool.
-        */
-       iounmap(qp->sq_mq.peer);
-       iounmap(qp->rq_mq.peer);
-       c2_free_mqsp(qp->sq_mq.shared);
-       c2_free_mqsp(qp->rq_mq.shared);
-
-       atomic_dec(&qp->refcount);
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-}
-
-/*
- * Function: move_sgl
- *
- * Description:
- * Move an SGL from the user's work request struct into a CCIL Work Request
- * message, swapping to WR byte order and ensure the total length doesn't
- * overflow.
- *
- * IN:
- * dst         - ptr to CCIL Work Request message SGL memory.
- * src         - ptr to the consumers SGL memory.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int
-move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
-        u8 * actual_count)
-{
-       u32 tot = 0;            /* running total */
-       u8 acount = 0;          /* running total non-0 len sge's */
-
-       while (count > 0) {
-               /*
-                * If the addition of this SGE causes the
-                * total SGL length to exceed 2^32-1, then
-                * fail-n-bail.
-                *
-                * If the current total plus the next element length
-                * wraps, then it will go negative and be less than the
-                * current total...
-                */
-               if ((tot + src->length) < tot) {
-                       return -EINVAL;
-               }
-               /*
-                * Bug: 1456 (as well as 1498 & 1643)
-                * Skip over any sge's supplied with len=0
-                */
-               if (src->length) {
-                       tot += src->length;
-                       dst->stag = cpu_to_be32(src->lkey);
-                       dst->to = cpu_to_be64(src->addr);
-                       dst->length = cpu_to_be32(src->length);
-                       dst++;
-                       acount++;
-               }
-               src++;
-               count--;
-       }
-
-       if (acount == 0) {
-               /*
-                * Bug: 1476 (as well as 1498, 1456 and 1643)
-                * Setup the SGL in the WR to make it easier for the RNIC.
-                * This way, the FW doesn't have to deal with special cases.
-                * Setting length=0 should be sufficient.
-                */
-               dst->stag = 0;
-               dst->to = 0;
-               dst->length = 0;
-       }
-
-       *p_len = tot;
-       *actual_count = acount;
-       return 0;
-}
-
-/*
- * Function: c2_activity (private function)
- *
- * Description:
- * Post an mq index to the host->adapter activity fifo.
- *
- * IN:
- * c2dev       - ptr to c2dev structure
- * mq_index    - mq index to post
- * shared      - value most recently written to shared
- *
- * OUT:
- *
- * Return:
- * none
- */
-static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
-{
-       /*
-        * First read the register to see if the FIFO is full, and if so,
-        * spin until it's not.  This isn't perfect -- there is no
-        * synchronization among the clients of the register, but in
-        * practice it prevents multiple CPU from hammering the bus
-        * with PCI RETRY. Note that when this does happen, the card
-        * cannot get on the bus and the card and system hang in a
-        * deadlock -- thus the need for this code. [TOT]
-        */
-       while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
-               udelay(10);
-
-       __raw_writel(C2_HINT_MAKE(mq_index, shared),
-                    c2dev->regs + PCI_BAR0_ADAPTER_HINT);
-}
-
-/*
- * Function: qp_wr_post
- *
- * Description:
- * This in-line function allocates a MQ msg, then moves the host-copy of
- * the completed WR into msg.  Then it posts the message.
- *
- * IN:
- * q           - ptr to user MQ.
- * wr          - ptr to host-copy of the WR.
- * qp          - ptr to user qp
- * size                - Number of bytes to post.  Assumed to be divisible by 4.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
-{
-       union c2wr *msg;
-
-       msg = c2_mq_alloc(q);
-       if (msg == NULL) {
-               return -EINVAL;
-       }
-#ifdef CCMSGMAGIC
-       ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-
-       /*
-        * Since all header fields in the WR are the same as the
-        * CQE, set the following so the adapter need not.
-        */
-       c2_wr_set_result(wr, CCERR_PENDING);
-
-       /*
-        * Copy the wr down to the adapter
-        */
-       memcpy((void *) msg, (void *) wr, size);
-
-       c2_mq_produce(q);
-       return 0;
-}
-
-
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                struct ib_send_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       u32 flags;
-       u32 tot_len;
-       u8 actual_sge_count;
-       u32 msg_size;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       while (ib_wr) {
-
-               flags = 0;
-               wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-                       flags |= SQ_SIGNALED;
-               }
-
-               switch (ib_wr->opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_INV:
-                       if (ib_wr->opcode == IB_WR_SEND) {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-                               wr.sqwr.send.remote_stag = 0;
-                       } else {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
-                               wr.sqwr.send.remote_stag =
-                                       cpu_to_be32(ib_wr->ex.invalidate_rkey);
-                       }
-
-                       msg_size = sizeof(struct c2wr_send_req) +
-                               sizeof(struct c2_data_addr) * ib_wr->num_sge;
-                       if (ib_wr->num_sge > qp->send_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_WRITE:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
-                       msg_size = sizeof(struct c2wr_rdma_write_req) +
-                           (sizeof(struct c2_data_addr) * ib_wr->num_sge);
-                       if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       wr.sqwr.rdma_write.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_write.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       err = move_sgl((struct c2_data_addr *)
-                                      & (wr.sqwr.rdma_write.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_READ:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
-                       msg_size = sizeof(struct c2wr_rdma_read_req);
-
-                       /* IWarp only suppots 1 sge for RDMA reads */
-                       if (ib_wr->num_sge > 1) {
-                               err = -EINVAL;
-                               break;
-                       }
-
-                       /*
-                        * Move the local and remote stag/to/len into the WR.
-                        */
-                       wr.sqwr.rdma_read.local_stag =
-                           cpu_to_be32(ib_wr->sg_list->lkey);
-                       wr.sqwr.rdma_read.local_to =
-                           cpu_to_be64(ib_wr->sg_list->addr);
-                       wr.sqwr.rdma_read.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_read.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       wr.sqwr.rdma_read.length =
-                           cpu_to_be32(ib_wr->sg_list->length);
-                       break;
-               default:
-                       /* error */
-                       msg_size = 0;
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               /*
-                * Store flags
-                */
-               c2_wr_set_flags(&wr, flags);
-
-               /*
-                * Post the puppy!
-                */
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO.
-                */
-               c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                   struct ib_recv_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       /*
-        * Try and post each work request
-        */
-       while (ib_wr) {
-               u32 tot_len;
-               u8 actual_sge_count;
-
-               if (ib_wr->num_sge > qp->recv_sgl_depth) {
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * Create local host-copy of the WR
-                */
-               wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               c2_wr_set_id(&wr, CCWR_RECV);
-               c2_wr_set_flags(&wr, 0);
-
-               /* sge_count is limited to eight bits. */
-               BUG_ON(ib_wr->num_sge >= 256);
-               err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
-                              ib_wr->sg_list,
-                              ib_wr->num_sge, &tot_len, &actual_sge_count);
-               c2_wr_set_sge_count(&wr, actual_sge_count);
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO
-                */
-               c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-void c2_init_qp_table(struct c2_dev *c2dev)
-{
-       spin_lock_init(&c2dev->qp_table.lock);
-       idr_init(&c2dev->qp_table.idr);
-}
-
-void c2_cleanup_qp_table(struct c2_dev *c2dev)
-{
-       idr_destroy(&c2dev->qp_table.idr);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_rnic.c b/drivers/staging/rdma/amso1100/c2_rnic.c
deleted file mode 100644 (file)
index 5e65c6d..0000000
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/inet.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <linux/route.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_vq.h"
-
-/* Device capabilities */
-#define C2_MIN_PAGESIZE  1024
-
-#define C2_MAX_MRS       32768
-#define C2_MAX_QPS       16000
-#define C2_MAX_WQE_SZ    256
-#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
-#define C2_MAX_SGES      4
-#define C2_MAX_SGE_RD    1
-#define C2_MAX_CQS       32768
-#define C2_MAX_CQES      4096
-#define C2_MAX_PDS       16384
-
-/*
- * Send the adapter INIT message to the amso1100
- */
-static int c2_adapter_init(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_INIT);
-       wr.hdr.context = 0;
-       wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
-       wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
-       wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
-       wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
-       wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
-       wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
-
-       /* Post the init message */
-       return vq_send_wr(c2dev, (union c2wr *) & wr);
-}
-
-/*
- * Send the adapter TERM message to the amso1100
- */
-static void c2_adapter_term(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_TERM);
-       wr.hdr.context = 0;
-
-       /* Post the init message */
-       vq_send_wr(c2dev, (union c2wr *) & wr);
-       c2dev->init = 0;
-
-       return;
-}
-
-/*
- * Query the adapter
- */
-static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_query_req wr;
-       struct c2wr_rnic_query_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply)
-               err = -ENOMEM;
-       else
-               err = c2_errno(reply);
-       if (err)
-               goto bail2;
-
-       props->fw_ver =
-               ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
-               ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
-               (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
-       memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
-       props->max_mr_size         = 0xFFFFFFFF;
-       props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
-       props->vendor_id           = be32_to_cpu(reply->vendor_id);
-       props->vendor_part_id      = be32_to_cpu(reply->part_number);
-       props->hw_ver              = be32_to_cpu(reply->hw_version);
-       props->max_qp              = be32_to_cpu(reply->max_qps);
-       props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
-       props->device_cap_flags    = c2dev->device_cap_flags;
-       props->max_sge             = C2_MAX_SGES;
-       props->max_sge_rd          = C2_MAX_SGE_RD;
-       props->max_cq              = be32_to_cpu(reply->max_cqs);
-       props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
-       props->max_mr              = be32_to_cpu(reply->max_mrs);
-       props->max_pd              = be32_to_cpu(reply->max_pds);
-       props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
-       props->max_ee_rd_atom      = 0;
-       props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
-       props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
-       props->max_ee_init_rd_atom = 0;
-       props->atomic_cap          = IB_ATOMIC_NONE;
-       props->max_ee              = 0;
-       props->max_rdd             = 0;
-       props->max_mw              = be32_to_cpu(reply->max_mws);
-       props->max_raw_ipv6_qp     = 0;
-       props->max_raw_ethy_qp     = 0;
-       props->max_mcast_grp       = 0;
-       props->max_mcast_qp_attach = 0;
-       props->max_total_mcast_qp_attach = 0;
-       props->max_ah              = 0;
-       props->max_fmr             = 0;
-       props->max_map_per_fmr     = 0;
-       props->max_srq             = 0;
-       props->max_srq_wr          = 0;
-       props->max_srq_sge         = 0;
-       props->max_pkeys           = 0;
-       props->local_ca_ack_delay  = 0;
-
- bail2:
-       vq_repbuf_free(c2dev, reply);
-
- bail1:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Add an IP address to the RNIC interface
- */
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Delete an IP address from the RNIC interface
- */
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Open a single RNIC instance to use with all
- * low level openib calls
- */
-static int c2_rnic_open(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_open_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
-       wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
-       wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
-       wr.rnic_open.req.port_num = cpu_to_be16(0);
-       wr.rnic_open.req.user_context = (unsigned long) c2dev;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = reply->rnic_handle;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Close the RNIC instance
- */
-static int c2_rnic_close(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_close_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
-       wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
-       wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = 0;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Called by c2_probe to initialize the RNIC. This principally
- * involves initializing the various limits and resource pools that
- * comprise the RNIC instance.
- */
-int c2_rnic_init(struct c2_dev *c2dev)
-{
-       int err;
-       u32 qsize, msgsize;
-       void *q1_pages;
-       void *q2_pages;
-       void __iomem *mmio_regs;
-
-       /* Device capabilities */
-       c2dev->device_cap_flags =
-           (IB_DEVICE_RESIZE_MAX_WR |
-            IB_DEVICE_CURR_QP_STATE_MOD |
-            IB_DEVICE_SYS_IMAGE_GUID |
-            IB_DEVICE_LOCAL_DMA_LKEY |
-            IB_DEVICE_MEM_WINDOW);
-
-       /* Allocate the qptr_array */
-       c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
-       if (!c2dev->qptr_array) {
-               return -ENOMEM;
-       }
-
-       /* Initialize the qptr_array */
-       c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
-       c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
-       c2dev->qptr_array[2] = (void *) &c2dev->aeq;
-
-       /* Initialize data structures */
-       init_waitqueue_head(&c2dev->req_vq_wo);
-       spin_lock_init(&c2dev->vqlock);
-       spin_lock_init(&c2dev->lock);
-
-       /* Allocate MQ shared pointer pool for kernel clients. User
-        * mode client pools are hung off the user context
-        */
-       err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
-       if (err) {
-               goto bail0;
-       }
-
-       /* Allocate shared pointers for Q0, Q1, and Q2 from
-        * the shared pointer pool.
-        */
-
-       c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->hint_count_dma,
-                                            GFP_KERNEL);
-       c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->req_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->rep_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                         &c2dev->aeq.shared_dma, GFP_KERNEL);
-       if (!c2dev->hint_count || !c2dev->req_vq.shared ||
-           !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       mmio_regs = c2dev->kva;
-       /* Initialize the Verbs Request Queue */
-       c2_mq_req_init(&c2dev->req_vq, 0,
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the Verbs Reply Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
-       q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->rep_vq.host_dma, GFP_KERNEL);
-       if (!q1_pages) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-       pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
-                (unsigned long long) c2dev->rep_vq.host_dma);
-       c2_mq_rep_init(&c2dev->rep_vq,
-                  1,
-                  qsize,
-                  msgsize,
-                  q1_pages,
-                  mmio_regs +
-                  be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
-                  C2_MQ_HOST_TARGET);
-
-       /* Initialize the Asynchronus Event Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
-       q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->aeq.host_dma, GFP_KERNEL);
-       if (!q2_pages) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-       dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-       pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
-                (unsigned long long) c2dev->aeq.host_dma);
-       c2_mq_rep_init(&c2dev->aeq,
-                      2,
-                      qsize,
-                      msgsize,
-                      q2_pages,
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
-                      C2_MQ_HOST_TARGET);
-
-       /* Initialize the verbs request allocator */
-       err = vq_init(c2dev);
-       if (err)
-               goto bail3;
-
-       /* Enable interrupts on the adapter */
-       writel(0, c2dev->regs + C2_IDIS);
-
-       /* create the WR init message */
-       err = c2_adapter_init(c2dev);
-       if (err)
-               goto bail4;
-       c2dev->init++;
-
-       /* open an adapter instance */
-       err = c2_rnic_open(c2dev);
-       if (err)
-               goto bail4;
-
-       /* Initialize cached the adapter limits */
-       err = c2_rnic_query(c2dev, &c2dev->props);
-       if (err)
-               goto bail5;
-
-       /* Initialize the PD pool */
-       err = c2_init_pd_table(c2dev);
-       if (err)
-               goto bail5;
-
-       /* Initialize the QP pool */
-       c2_init_qp_table(c2dev);
-       return 0;
-
-bail5:
-       c2_rnic_close(c2dev);
-bail4:
-       vq_term(c2dev);
-bail3:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
-bail2:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
-bail1:
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-bail0:
-       vfree(c2dev->qptr_array);
-
-       return err;
-}
-
-/*
- * Called by c2_remove to cleanup the RNIC resources.
- */
-void c2_rnic_term(struct c2_dev *c2dev)
-{
-
-       /* Close the open adapter instance */
-       c2_rnic_close(c2dev);
-
-       /* Send the TERM message to the adapter */
-       c2_adapter_term(c2dev);
-
-       /* Disable interrupts on the adapter */
-       writel(1, c2dev->regs + C2_IDIS);
-
-       /* Free the QP pool */
-       c2_cleanup_qp_table(c2dev);
-
-       /* Free the PD pool */
-       c2_cleanup_pd_table(c2dev);
-
-       /* Free the verbs request allocator */
-       vq_term(c2dev);
-
-       /* Free the asynchronus event queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         c2dev->aeq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->aeq, mapping));
-
-       /* Free the verbs reply queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         c2dev->rep_vq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->rep_vq, mapping));
-
-       /* Free the MQ shared pointer pool */
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-
-       /* Free the qptr_array */
-       vfree(c2dev->qptr_array);
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_status.h b/drivers/staging/rdma/amso1100/c2_status.h
deleted file mode 100644 (file)
index 6ee4aa9..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef        _C2_STATUS_H_
-#define _C2_STATUS_H_
-
-/*
- * Verbs Status Codes
- */
-enum c2_status {
-       C2_OK = 0,              /* This must be zero */
-       CCERR_INSUFFICIENT_RESOURCES = 1,
-       CCERR_INVALID_MODIFIER = 2,
-       CCERR_INVALID_MODE = 3,
-       CCERR_IN_USE = 4,
-       CCERR_INVALID_RNIC = 5,
-       CCERR_INTERRUPTED_OPERATION = 6,
-       CCERR_INVALID_EH = 7,
-       CCERR_INVALID_CQ = 8,
-       CCERR_CQ_EMPTY = 9,
-       CCERR_NOT_IMPLEMENTED = 10,
-       CCERR_CQ_DEPTH_TOO_SMALL = 11,
-       CCERR_PD_IN_USE = 12,
-       CCERR_INVALID_PD = 13,
-       CCERR_INVALID_SRQ = 14,
-       CCERR_INVALID_ADDRESS = 15,
-       CCERR_INVALID_NETMASK = 16,
-       CCERR_INVALID_QP = 17,
-       CCERR_INVALID_QP_STATE = 18,
-       CCERR_TOO_MANY_WRS_POSTED = 19,
-       CCERR_INVALID_WR_TYPE = 20,
-       CCERR_INVALID_SGL_LENGTH = 21,
-       CCERR_INVALID_SQ_DEPTH = 22,
-       CCERR_INVALID_RQ_DEPTH = 23,
-       CCERR_INVALID_ORD = 24,
-       CCERR_INVALID_IRD = 25,
-       CCERR_QP_ATTR_CANNOT_CHANGE = 26,
-       CCERR_INVALID_STAG = 27,
-       CCERR_QP_IN_USE = 28,
-       CCERR_OUTSTANDING_WRS = 29,
-       CCERR_STAG_IN_USE = 30,
-       CCERR_INVALID_STAG_INDEX = 31,
-       CCERR_INVALID_SGL_FORMAT = 32,
-       CCERR_ADAPTER_TIMEOUT = 33,
-       CCERR_INVALID_CQ_DEPTH = 34,
-       CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
-       CCERR_INVALID_EP = 36,
-       CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
-       CCERR_FLUSHED = 38,
-       CCERR_INVALID_WQE = 39,
-       CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
-       CCERR_REMOTE_TERMINATION_ERROR = 41,
-       CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
-       CCERR_ACCESS_VIOLATION = 43,
-       CCERR_INVALID_PD_ID = 44,
-       CCERR_WRAP_ERROR = 45,
-       CCERR_INV_STAG_ACCESS_ERROR = 46,
-       CCERR_ZERO_RDMA_READ_RESOURCES = 47,
-       CCERR_QP_NOT_PRIVILEGED = 48,
-       CCERR_STAG_STATE_NOT_INVALID = 49,
-       CCERR_INVALID_PAGE_SIZE = 50,
-       CCERR_INVALID_BUFFER_SIZE = 51,
-       CCERR_INVALID_PBE = 52,
-       CCERR_INVALID_FBO = 53,
-       CCERR_INVALID_LENGTH = 54,
-       CCERR_INVALID_ACCESS_RIGHTS = 55,
-       CCERR_PBL_TOO_BIG = 56,
-       CCERR_INVALID_VA = 57,
-       CCERR_INVALID_REGION = 58,
-       CCERR_INVALID_WINDOW = 59,
-       CCERR_TOTAL_LENGTH_TOO_BIG = 60,
-       CCERR_INVALID_QP_ID = 61,
-       CCERR_ADDR_IN_USE = 62,
-       CCERR_ADDR_NOT_AVAIL = 63,
-       CCERR_NET_DOWN = 64,
-       CCERR_NET_UNREACHABLE = 65,
-       CCERR_CONN_ABORTED = 66,
-       CCERR_CONN_RESET = 67,
-       CCERR_NO_BUFS = 68,
-       CCERR_CONN_TIMEDOUT = 69,
-       CCERR_CONN_REFUSED = 70,
-       CCERR_HOST_UNREACHABLE = 71,
-       CCERR_INVALID_SEND_SGL_DEPTH = 72,
-       CCERR_INVALID_RECV_SGL_DEPTH = 73,
-       CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
-       CCERR_INSUFFICIENT_PRIVILEGES = 75,
-       CCERR_STACK_ERROR = 76,
-       CCERR_INVALID_VERSION = 77,
-       CCERR_INVALID_MTU = 78,
-       CCERR_INVALID_IMAGE = 79,
-       CCERR_PENDING = 98,     /* not an error; user internally by adapter */
-       CCERR_DEFER = 99,       /* not an error; used internally by adapter */
-       CCERR_FAILED_WRITE = 100,
-       CCERR_FAILED_ERASE = 101,
-       CCERR_FAILED_VERIFICATION = 102,
-       CCERR_NOT_FOUND = 103,
-
-};
-
-/*
- * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
- */
-enum c2_connect_status {
-       C2_CONN_STATUS_SUCCESS = C2_OK,
-       C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
-       C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
-       C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
-       C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
-       C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
-       C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
-       C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
-       C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
-       C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
-       C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
-};
-
-/*
- * Flash programming status codes.
- */
-enum c2_flash_status {
-       C2_FLASH_STATUS_SUCCESS = 0x0000,
-       C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
-       C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
-       C2_FLASH_STATUS_ECLBS = 0x0400,
-       C2_FLASH_STATUS_PSLBS = 0x0800,
-       C2_FLASH_STATUS_VPENS = 0x1000,
-};
-
-#endif                         /* _C2_STATUS_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_user.h b/drivers/staging/rdma/amso1100/c2_user.h
deleted file mode 100644 (file)
index 7e9e7ad..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_USER_H
-#define C2_USER_H
-
-#include <linux/types.h>
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct c2_alloc_ucontext_resp {
-       __u32 qp_tab_size;
-       __u32 uarc_size;
-};
-
-struct c2_alloc_pd_resp {
-       __u32 pdn;
-       __u32 reserved;
-};
-
-struct c2_create_cq {
-       __u32 lkey;
-       __u32 pdn;
-       __u64 arm_db_page;
-       __u64 set_db_page;
-       __u32 arm_db_index;
-       __u32 set_db_index;
-};
-
-struct c2_create_cq_resp {
-       __u32 cqn;
-       __u32 reserved;
-};
-
-struct c2_create_qp {
-       __u32 lkey;
-       __u32 reserved;
-       __u64 sq_db_page;
-       __u64 rq_db_page;
-       __u32 sq_db_index;
-       __u32 rq_db_index;
-};
-
-#endif                         /* C2_USER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_vq.c b/drivers/staging/rdma/amso1100/c2_vq.c
deleted file mode 100644 (file)
index 2ec716f..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include "c2_vq.h"
-#include "c2_provider.h"
-
-/*
- * Verbs Request Objects:
- *
- * VQ Request Objects are allocated by the kernel verbs handlers.
- * They contain a wait object, a refcnt, an atomic bool indicating that the
- * adapter has replied, and a copy of the verb reply work request.
- * A pointer to the VQ Request Object is passed down in the context
- * field of the work request message, and reflected back by the adapter
- * in the verbs reply message.  The function handle_vq() in the interrupt
- * path will use this pointer to:
- *     1) append a copy of the verbs reply message
- *     2) mark that the reply is ready
- *     3) wake up the kernel verbs handler blocked awaiting the reply.
- *
- *
- * The kernel verbs handlers do a "get" to put a 2nd reference on the
- * VQ Request object.  If the kernel verbs handler exits before the adapter
- * can respond, this extra reference will keep the VQ Request object around
- * until the adapter's reply can be processed.  The reason we need this is
- * because a pointer to this object is stuffed into the context field of
- * the verbs work request message, and reflected back in the reply message.
- * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
- * kernel verb handler that is blocked awaiting the verb reply.
- * So handle_vq() will do a "put" on the object when it's done accessing it.
- * NOTE:  If we guarantee that the kernel verb handler will never bail before
- *        getting the reply, then we don't need these refcnts.
- *
- *
- * VQ Request objects are freed by the kernel verbs handlers only
- * after the verb has been processed, or when the adapter fails and
- * does not reply.
- *
- *
- * Verbs Reply Buffers:
- *
- * VQ Reply bufs are local host memory copies of a
- * outstanding Verb Request reply
- * message.  The are always allocated by the kernel verbs handlers, and _may_ be
- * freed by either the kernel verbs handler -or- the interrupt handler.  The
- * kernel verbs handler _must_ free the repbuf, then free the vq request object
- * in that order.
- */
-
-int vq_init(struct c2_dev *c2dev)
-{
-       sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
-               (char) ('0' + c2dev->devnum));
-       c2dev->host_msg_cache =
-           kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
-                             SLAB_HWCACHE_ALIGN, NULL);
-       if (c2dev->host_msg_cache == NULL) {
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void vq_term(struct c2_dev *c2dev)
-{
-       kmem_cache_destroy(c2dev->host_msg_cache);
-}
-
-/* vq_req_alloc - allocate a VQ Request Object and initialize it.
- * The refcnt is set to 1.
- */
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *r;
-
-       r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
-       if (r) {
-               init_waitqueue_head(&r->wait_object);
-               r->reply_msg = 0;
-               r->event = 0;
-               r->cm_id = NULL;
-               r->qp = NULL;
-               atomic_set(&r->refcnt, 1);
-               atomic_set(&r->reply_ready, 0);
-       }
-       return r;
-}
-
-
-/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
- * has already free the VQ Reply Buffer if it existed.
- */
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       r->reply_msg = 0;
-       if (atomic_dec_and_test(&r->refcnt)) {
-               kfree(r);
-       }
-}
-
-/* vq_req_get - reference a VQ Request Object.  Done
- * only in the kernel verbs handlers.
- */
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       atomic_inc(&r->refcnt);
-}
-
-
-/* vq_req_put - dereference and potentially free a VQ Request Object.
- *
- * This is only called by handle_vq() on the
- * interrupt when it is done processing
- * a verb reply message.  If the associated
- * kernel verbs handler has already bailed,
- * then this put will actually free the VQ
- * Request object _and_ the VQ Reply Buffer
- * if it exists.
- */
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       if (atomic_dec_and_test(&r->refcnt)) {
-               if (r->reply_msg != 0)
-                       vq_repbuf_free(c2dev,
-                                      (void *) (unsigned long) r->reply_msg);
-               kfree(r);
-       }
-}
-
-
-/*
- * vq_repbuf_alloc - allocate a VQ Reply Buffer.
- */
-void *vq_repbuf_alloc(struct c2_dev *c2dev)
-{
-       return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
-}
-
-/*
- * vq_send_wr - post a verbs request message to the Verbs Request Queue.
- * If a message is not available in the MQ, then block until one is available.
- * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
- * When the adapter drains the Verbs Request Queue,
- * it inserts MQ index 0 in to the
- * adapter->host activity fifo and interrupts the host.
- */
-int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
-{
-       void *msg;
-       wait_queue_t __wait;
-
-       /*
-        * grab adapter vq lock
-        */
-       spin_lock(&c2dev->vqlock);
-
-       /*
-        * allocate msg
-        */
-       msg = c2_mq_alloc(&c2dev->req_vq);
-
-       /*
-        * If we cannot get a msg, then we'll wait
-        * When a messages are available, the int handler will wake_up()
-        * any waiters.
-        */
-       while (msg == NULL) {
-               pr_debug("%s:%d no available msg in VQ, waiting...\n",
-                      __func__, __LINE__);
-               init_waitqueue_entry(&__wait, current);
-               add_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_unlock(&c2dev->vqlock);
-               for (;;) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       if (!c2_mq_full(&c2dev->req_vq)) {
-                               break;
-                       }
-                       if (!signal_pending(current)) {
-                               schedule_timeout(1 * HZ);       /* 1 second... */
-                               continue;
-                       }
-                       set_current_state(TASK_RUNNING);
-                       remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-                       return -EINTR;
-               }
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_lock(&c2dev->vqlock);
-               msg = c2_mq_alloc(&c2dev->req_vq);
-       }
-
-       /*
-        * copy wr into adapter msg
-        */
-       memcpy(msg, wr, c2dev->req_vq.msg_size);
-
-       /*
-        * post msg
-        */
-       c2_mq_produce(&c2dev->req_vq);
-
-       /*
-        * release adapter vq lock
-        */
-       spin_unlock(&c2dev->vqlock);
-       return 0;
-}
-
-
-/*
- * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
- */
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
-{
-       if (!wait_event_timeout(req->wait_object,
-                               atomic_read(&req->reply_ready),
-                               60*HZ))
-               return -ETIMEDOUT;
-
-       return 0;
-}
-
-/*
- * vq_repbuf_free - Free a Verbs Reply Buffer.
- */
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
-{
-       kmem_cache_free(c2dev->host_msg_cache, reply);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_vq.h b/drivers/staging/rdma/amso1100/c2_vq.h
deleted file mode 100644 (file)
index c1f6cef..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_VQ_H_
-#define _C2_VQ_H_
-#include <linux/sched.h>
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_provider.h"
-
-struct c2_vq_req {
-       u64 reply_msg;          /* ptr to reply msg */
-       wait_queue_head_t wait_object;  /* wait object for vq reqs */
-       atomic_t reply_ready;   /* set when reply is ready */
-       atomic_t refcnt;        /* used to cancel WRs... */
-       int event;
-       struct iw_cm_id *cm_id;
-       struct c2_qp *qp;
-};
-
-int vq_init(struct c2_dev *c2dev);
-void vq_term(struct c2_dev *c2dev);
-
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
-int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
-
-void *vq_repbuf_alloc(struct c2_dev *c2dev);
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
-
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
-#endif                         /* _C2_VQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_wr.h b/drivers/staging/rdma/amso1100/c2_wr.h
deleted file mode 100644 (file)
index 8d4b4ca..0000000
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_WR_H_
-#define _C2_WR_H_
-
-#ifdef CCDEBUG
-#define CCWR_MAGIC             0xb07700b0
-#endif
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-/* Maximum allowed size in bytes of private_data exchange
- * on connect.
- */
-#define C2_MAX_PRIVATE_DATA_SIZE 200
-
-/*
- * These types are shared among the adapter, host, and CCIL consumer.
- */
-enum c2_cq_notification_type {
-       C2_CQ_NOTIFICATION_TYPE_NONE = 1,
-       C2_CQ_NOTIFICATION_TYPE_NEXT,
-       C2_CQ_NOTIFICATION_TYPE_NEXT_SE
-};
-
-enum c2_setconfig_cmd {
-       C2_CFG_ADD_ADDR = 1,
-       C2_CFG_DEL_ADDR = 2,
-       C2_CFG_ADD_ROUTE = 3,
-       C2_CFG_DEL_ROUTE = 4
-};
-
-enum c2_getconfig_cmd {
-       C2_GETCONFIG_ROUTES = 1,
-       C2_GETCONFIG_ADDRS
-};
-
-/*
- *  CCIL Work Request Identifiers
- */
-enum c2wr_ids {
-       CCWR_RNIC_OPEN = 1,
-       CCWR_RNIC_QUERY,
-       CCWR_RNIC_SETCONFIG,
-       CCWR_RNIC_GETCONFIG,
-       CCWR_RNIC_CLOSE,
-       CCWR_CQ_CREATE,
-       CCWR_CQ_QUERY,
-       CCWR_CQ_MODIFY,
-       CCWR_CQ_DESTROY,
-       CCWR_QP_CONNECT,
-       CCWR_PD_ALLOC,
-       CCWR_PD_DEALLOC,
-       CCWR_SRQ_CREATE,
-       CCWR_SRQ_QUERY,
-       CCWR_SRQ_MODIFY,
-       CCWR_SRQ_DESTROY,
-       CCWR_QP_CREATE,
-       CCWR_QP_QUERY,
-       CCWR_QP_MODIFY,
-       CCWR_QP_DESTROY,
-       CCWR_NSMR_STAG_ALLOC,
-       CCWR_NSMR_REGISTER,
-       CCWR_NSMR_PBL,
-       CCWR_STAG_DEALLOC,
-       CCWR_NSMR_REREGISTER,
-       CCWR_SMR_REGISTER,
-       CCWR_MR_QUERY,
-       CCWR_MW_ALLOC,
-       CCWR_MW_QUERY,
-       CCWR_EP_CREATE,
-       CCWR_EP_GETOPT,
-       CCWR_EP_SETOPT,
-       CCWR_EP_DESTROY,
-       CCWR_EP_BIND,
-       CCWR_EP_CONNECT,
-       CCWR_EP_LISTEN,
-       CCWR_EP_SHUTDOWN,
-       CCWR_EP_LISTEN_CREATE,
-       CCWR_EP_LISTEN_DESTROY,
-       CCWR_EP_QUERY,
-       CCWR_CR_ACCEPT,
-       CCWR_CR_REJECT,
-       CCWR_CONSOLE,
-       CCWR_TERM,
-       CCWR_FLASH_INIT,
-       CCWR_FLASH,
-       CCWR_BUF_ALLOC,
-       CCWR_BUF_FREE,
-       CCWR_FLASH_WRITE,
-       CCWR_INIT,              /* WARNING: Don't move this ever again! */
-
-
-
-       /* Add new IDs here */
-
-
-
-       /*
-        * WARNING: CCWR_LAST must always be the last verbs id defined!
-        *          All the preceding IDs are fixed, and must not change.
-        *          You can add new IDs, but must not remove or reorder
-        *          any IDs. If you do, YOU will ruin any hope of
-        *          compatibility between versions.
-        */
-       CCWR_LAST,
-
-       /*
-        * Start over at 1 so that arrays indexed by user wr id's
-        * begin at 1.  This is OK since the verbs and user wr id's
-        * are always used on disjoint sets of queues.
-        */
-       /*
-        * The order of the CCWR_SEND_XX verbs must
-        * match the order of the RDMA_OPs
-        */
-       CCWR_SEND = 1,
-       CCWR_SEND_INV,
-       CCWR_SEND_SE,
-       CCWR_SEND_SE_INV,
-       CCWR_RDMA_WRITE,
-       CCWR_RDMA_READ,
-       CCWR_RDMA_READ_INV,
-       CCWR_MW_BIND,
-       CCWR_NSMR_FASTREG,
-       CCWR_STAG_INVALIDATE,
-       CCWR_RECV,
-       CCWR_NOP,
-       CCWR_UNIMPL,
-/* WARNING: This must always be the last user wr id defined! */
-};
-#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
-
-/*
- * SQ/RQ Work Request Types
- */
-enum c2_wr_type {
-       C2_WR_TYPE_SEND = CCWR_SEND,
-       C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
-       C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
-       C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
-       C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
-       C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
-       C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
-       C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
-       C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
-       C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
-       C2_WR_TYPE_RECV = CCWR_RECV,
-       C2_WR_TYPE_NOP = CCWR_NOP,
-};
-
-struct c2_netaddr {
-       __be32 ip_addr;
-       __be32 netmask;
-       u32 mtu;
-};
-
-struct c2_route {
-       u32 ip_addr;            /* 0 indicates the default route */
-       u32 netmask;            /* netmask associated with dst */
-       u32 flags;
-       union {
-               u32 ipaddr;     /* address of the nexthop interface */
-               u8 enaddr[6];
-       } nexthop;
-};
-
-/*
- * A Scatter Gather Entry.
- */
-struct c2_data_addr {
-       __be32 stag;
-       __be32 length;
-       __be64 to;
-};
-
-/*
- * MR and MW flags used by the consumer, RI, and RNIC.
- */
-enum c2_mm_flags {
-       MEM_REMOTE = 0x0001,    /* allow mw binds with remote access. */
-       MEM_VA_BASED = 0x0002,  /* Not Zero-based */
-       MEM_PBL_COMPLETE = 0x0004,      /* PBL array is complete in this msg */
-       MEM_LOCAL_READ = 0x0008,        /* allow local reads */
-       MEM_LOCAL_WRITE = 0x0010,       /* allow local writes */
-       MEM_REMOTE_READ = 0x0020,       /* allow remote reads */
-       MEM_REMOTE_WRITE = 0x0040,      /* allow remote writes */
-       MEM_WINDOW_BIND = 0x0080,       /* binds allowed */
-       MEM_SHARED = 0x0100,    /* set if MR is shared */
-       MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
-};
-
-/*
- * CCIL API ACF flags defined in terms of the low level mem flags.
- * This minimizes translation needed in the user API
- */
-enum c2_acf {
-       C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
-       C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
-       C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
-       C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
-       C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
-};
-
-/*
- * Image types of objects written to flash
- */
-#define C2_FLASH_IMG_BITFILE 1
-#define C2_FLASH_IMG_OPTION_ROM 2
-#define C2_FLASH_IMG_VPD 3
-
-/*
- *  to fix bug 1815 we define the max size allowable of the
- *  terminate message (per the IETF spec).Refer to the IETF
- *  protocol specification, section 12.1.6, page 64)
- *  The message is prefixed by 20 types of DDP info.
- *
- *  Then the message has 6 bytes for the terminate control
- *  and DDP segment length info plus a DDP header (either
- *  14 or 18 byts) plus 28 bytes for the RDMA header.
- *  Thus the max size in:
- *  20 + (6 + 18 + 28) = 72
- */
-#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
-
-/*
- * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
- */
-#define WR_BUILD_STR_LEN 64
-
-/*
- * WARNING:  All of these structs need to align any 64bit types on
- * 64 bit boundaries!  64bit types include u64 and u64.
- */
-
-/*
- * Clustercore Work Request Header.  Be sensitive to field layout
- * and alignment.
- */
-struct c2wr_hdr {
-       /* wqe_count is part of the cqe.  It is put here so the
-        * adapter can write to it while the wr is pending without
-        * clobbering part of the wr.  This word need not be dma'd
-        * from the host to adapter by libccil, but we copy it anyway
-        * to make the memcpy to the adapter better aligned.
-        */
-       __be32 wqe_count;
-
-       /* Put these fields next so that later 32- and 64-bit
-        * quantities are naturally aligned.
-        */
-       u8 id;
-       u8 result;              /* adapter -> host */
-       u8 sge_count;           /* host -> adapter */
-       u8 flags;               /* host -> adapter */
-
-       u64 context;
-#ifdef CCMSGMAGIC
-       u32 magic;
-       u32 pad;
-#endif
-} __attribute__((packed));
-
-/*
- *------------------------ RNIC ------------------------
- */
-
-/*
- * WR_RNIC_OPEN
- */
-
-/*
- * Flags for the RNIC WRs
- */
-enum c2_rnic_flags {
-       RNIC_IRD_STATIC = 0x0001,
-       RNIC_ORD_STATIC = 0x0002,
-       RNIC_QP_STATIC = 0x0004,
-       RNIC_SRQ_SUPPORTED = 0x0008,
-       RNIC_PBL_BLOCK_MODE = 0x0010,
-       RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
-       RNIC_CQ_OVF_DETECTED = 0x0040,
-       RNIC_PRIV_MODE = 0x0080
-};
-
-struct c2wr_rnic_open_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be16 flags;           /* See enum c2_rnic_flags */
-       __be16 port_num;
-} __attribute__((packed));
-
-struct c2wr_rnic_open_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-union c2wr_rnic_open {
-       struct c2wr_rnic_open_req req;
-       struct c2wr_rnic_open_rep rep;
-} __attribute__((packed));
-
-struct c2wr_rnic_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_QUERY
- */
-struct c2wr_rnic_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be32 vendor_id;
-       __be32 part_number;
-       __be32 hw_version;
-       __be32 fw_ver_major;
-       __be32 fw_ver_minor;
-       __be32 fw_ver_patch;
-       char fw_ver_build_str[WR_BUILD_STR_LEN];
-       __be32 max_qps;
-       __be32 max_qp_depth;
-       u32 max_srq_depth;
-       u32 max_send_sgl_depth;
-       u32 max_rdma_sgl_depth;
-       __be32 max_cqs;
-       __be32 max_cq_depth;
-       u32 max_cq_event_handlers;
-       __be32 max_mrs;
-       u32 max_pbl_depth;
-       __be32 max_pds;
-       __be32 max_global_ird;
-       u32 max_global_ord;
-       __be32 max_qp_ird;
-       __be32 max_qp_ord;
-       u32 flags;
-       __be32 max_mws;
-       u32 pbe_range_low;
-       u32 pbe_range_high;
-       u32 max_srqs;
-       u32 page_size;
-} __attribute__((packed));
-
-union c2wr_rnic_query {
-       struct c2wr_rnic_query_req req;
-       struct c2wr_rnic_query_rep rep;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_GETCONFIG
- */
-
-struct c2wr_rnic_getconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u64 reply_buf;
-       u32 reply_buf_len;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_getconfig_rep {
-       struct c2wr_hdr hdr;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u32 count_len;          /* length of the number of addresses configured */
-} __attribute__((packed)) ;
-
-union c2wr_rnic_getconfig {
-       struct c2wr_rnic_getconfig_req req;
-       struct c2wr_rnic_getconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_SETCONFIG
- */
-struct c2wr_rnic_setconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 option;          /* See c2_setconfig_cmd_t */
-       /* variable data and pad. See c2_netaddr and c2_route */
-       u8 data[0];
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_setconfig_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_setconfig {
-       struct c2wr_rnic_setconfig_req req;
-       struct c2wr_rnic_setconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_CLOSE
- */
-struct c2wr_rnic_close_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_close_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_close {
-       struct c2wr_rnic_close_req req;
-       struct c2wr_rnic_close_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ CQ ------------------------
- */
-struct c2wr_cq_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_ht;
-       u64 user_context;
-       __be64 msg_pool;
-       u32 rnic_handle;
-       __be32 msg_size;
-       __be32 depth;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 mq_index;
-       __be32 adapter_shared;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_cq_create {
-       struct c2wr_cq_create_req req;
-       struct c2wr_cq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-       u32 new_depth;
-       u64 new_msg_pool;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_modify {
-       struct c2wr_cq_modify_req req;
-       struct c2wr_cq_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_destroy {
-       struct c2wr_cq_destroy_req req;
-       struct c2wr_cq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ PD ------------------------
- */
-struct c2wr_pd_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_alloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_alloc {
-       struct c2wr_pd_alloc_req req;
-       struct c2wr_pd_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_dealloc {
-       struct c2wr_pd_dealloc_req req;
-       struct c2wr_pd_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ SRQ ------------------------
- */
-struct c2wr_srq_create_req {
-       struct c2wr_hdr hdr;
-       u64 shared_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 srq_depth;
-       u32 srq_limit;
-       u32 sgl_depth;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_create_rep {
-       struct c2wr_hdr hdr;
-       u32 srq_depth;
-       u32 sgl_depth;
-       u32 msg_size;
-       u32 mq_index;
-       u32 mq_start;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_srq_create {
-       struct c2wr_srq_create_req req;
-       struct c2wr_srq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_srq_destroy {
-       struct c2wr_srq_destroy_req req;
-       struct c2wr_srq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ QP ------------------------
- */
-enum c2wr_qp_flags {
-       QP_RDMA_READ = 0x00000001,      /* RDMA read enabled? */
-       QP_RDMA_WRITE = 0x00000002,     /* RDMA write enabled? */
-       QP_MW_BIND = 0x00000004,        /* MWs enabled */
-       QP_ZERO_STAG = 0x00000008,      /* enabled? */
-       QP_REMOTE_TERMINATION = 0x00000010,     /* remote end terminated */
-       QP_RDMA_READ_RESPONSE = 0x00000020      /* Remote RDMA read  */
-           /* enabled? */
-};
-
-struct c2wr_qp_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_sq_ht;
-       __be64 shared_rq_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_cq_handle;
-       u32 rq_cq_handle;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 srq_handle;
-       u32 srq_limit;
-       __be32 flags;           /* see enum c2wr_qp_flags */
-       __be32 send_sgl_depth;
-       __be32 recv_sgl_depth;
-       __be32 rdma_write_sgl_depth;
-       __be32 ord;
-       __be32 ird;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 ord;
-       u32 ird;
-       __be32 sq_msg_size;
-       __be32 sq_mq_index;
-       __be32 sq_mq_start;
-       __be32 rq_msg_size;
-       __be32 rq_mq_index;
-       __be32 rq_mq_start;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-union c2wr_qp_create {
-       struct c2wr_qp_create_req req;
-       struct c2wr_qp_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 send_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 ord;
-       u32 ird;
-       u16 qp_state;
-       u16 flags;              /* see c2wr_qp_flags_t */
-       u32 qp_id;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-       u32 terminate_msg_length;       /* 0 if not present */
-       u8 data[0];
-       /* Terminate Message in-line here. */
-} __attribute__((packed)) ;
-
-union c2wr_qp_query {
-       struct c2wr_qp_query_req req;
-       struct c2wr_qp_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_req {
-       struct c2wr_hdr hdr;
-       u64 stream_msg;
-       u32 stream_msg_length;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 next_qp_state;
-       __be32 ord;
-       __be32 ird;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 llp_ep_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_rep {
-       struct c2wr_hdr hdr;
-       u32 ord;
-       u32 ird;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 sq_msg_size;
-       u32 sq_mq_index;
-       u32 sq_mq_start;
-       u32 rq_msg_size;
-       u32 rq_mq_index;
-       u32 rq_mq_start;
-} __attribute__((packed)) ;
-
-union c2wr_qp_modify {
-       struct c2wr_qp_modify_req req;
-       struct c2wr_qp_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_qp_destroy {
-       struct c2wr_qp_destroy_req req;
-       struct c2wr_qp_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
- * only be posted when a QP is in IDLE state.  After the connect request is
- * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
- * No synchronous reply from adapter to this WR.  The results of
- * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
- * See c2wr_ae_active_connect_results_t
- */
-struct c2wr_qp_connect_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 remote_addr;
-       __be16 remote_port;
-       u16 pad;
-       __be32 private_data_length;
-       u8 private_data[0];     /* Private data in-line. */
-} __attribute__((packed)) ;
-
-struct c2wr_qp_connect {
-       struct c2wr_qp_connect_req req;
-       /* no synchronous reply.         */
-} __attribute__((packed)) ;
-
-
-/*
- *------------------------ MM ------------------------
- */
-
-struct c2wr_nsmr_stag_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pbl_depth;
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_stag_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_stag_alloc {
-       struct c2wr_nsmr_stag_alloc_req req;
-       struct c2wr_nsmr_stag_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_req {
-       struct c2wr_hdr hdr;
-       __be64 va;
-       u32 rnic_handle;
-       __be16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 pd_id;
-       __be32 pbl_depth;
-       __be32 pbe_size;
-       __be32 fbo;
-       __be32 length;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_register {
-       struct c2wr_nsmr_register_req req;
-       struct c2wr_nsmr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 flags;
-       __be32 stag_index;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_pbl {
-       struct c2wr_nsmr_pbl_req req;
-       struct c2wr_nsmr_pbl_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-       u32 pbl_depth;
-} __attribute__((packed)) ;
-
-union c2wr_mr_query {
-       struct c2wr_mr_query_req req;
-       struct c2wr_mr_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-union c2wr_mw_query {
-       struct c2wr_mw_query_req req;
-       struct c2wr_mw_query_rep rep;
-} __attribute__((packed)) ;
-
-
-struct c2wr_stag_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_stag_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_stag_dealloc {
-       struct c2wr_stag_dealloc_req req;
-       struct c2wr_stag_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-       u32 pbl_depth;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       u32 pad1;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_reregister {
-       struct c2wr_nsmr_reregister_req req;
-       struct c2wr_nsmr_reregister_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_smr_register {
-       struct c2wr_smr_register_req req;
-       struct c2wr_smr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_mw_alloc {
-       struct c2wr_mw_alloc_req req;
-       struct c2wr_mw_alloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ WRs -----------------------
- */
-
-struct c2wr_user_hdr {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-} __attribute__((packed)) ;
-
-enum c2_qp_state {
-       C2_QP_STATE_IDLE = 0x01,
-       C2_QP_STATE_CONNECTING = 0x02,
-       C2_QP_STATE_RTS = 0x04,
-       C2_QP_STATE_CLOSING = 0x08,
-       C2_QP_STATE_TERMINATE = 0x10,
-       C2_QP_STATE_ERROR = 0x20,
-};
-
-/* Completion queue entry. */
-struct c2wr_ce {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-       u64 qp_user_context;    /* c2_user_qp_t * */
-       u32 qp_state;           /* Current QP State */
-       u32 handle;             /* QPID or EP Handle */
-       __be32 bytes_rcvd;              /* valid for RECV WCs */
-       u32 stag;
-} __attribute__((packed)) ;
-
-
-/*
- * Flags used for all post-sq WRs.  These must fit in the flags
- * field of the struct c2wr_hdr (eight bits).
- */
-enum {
-       SQ_SIGNALED = 0x01,
-       SQ_READ_FENCE = 0x02,
-       SQ_FENCE = 0x04,
-};
-
-/*
- * Common fields for all post-sq WRs.  Namely the standard header and a
- * secondary header with fields common to all post-sq WRs.
- */
-struct c2_sq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * Same as above but for post-rq WRs.
- */
-struct c2_rq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * use the same struct for all sends.
- */
-struct c2wr_send_req {
-       struct c2_sq_hdr sq_hdr;
-       __be32 sge_len;
-       __be32 remote_stag;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_send {
-       struct c2wr_send_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_write_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 remote_to;
-       __be32 remote_stag;
-       __be32 sge_len;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_rdma_write {
-       struct c2wr_rdma_write_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_read_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 local_to;
-       __be64 remote_to;
-       __be32 local_stag;
-       __be32 remote_stag;
-       __be32 length;
-} __attribute__((packed));
-
-union c2wr_rdma_read {
-       struct c2wr_rdma_read_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_mw_bind_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 mw_stag_index;
-       u32 mr_stag_index;
-       u32 length;
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_mw_bind {
-       struct c2wr_mw_bind_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_nsmr_fastreg_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed));
-
-union c2wr_nsmr_fastreg {
-       struct c2wr_nsmr_fastreg_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_stag_invalidate_req {
-       struct c2_sq_hdr sq_hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-} __attribute__((packed));
-
-union c2wr_stag_invalidate {
-       struct c2wr_stag_invalidate_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-union c2wr_sqwr {
-       struct c2_sq_hdr sq_hdr;
-       struct c2wr_send_req send;
-       struct c2wr_send_req send_se;
-       struct c2wr_send_req send_inv;
-       struct c2wr_send_req send_se_inv;
-       struct c2wr_rdma_write_req rdma_write;
-       struct c2wr_rdma_read_req rdma_read;
-       struct c2wr_mw_bind_req mw_bind;
-       struct c2wr_nsmr_fastreg_req nsmr_fastreg;
-       struct c2wr_stag_invalidate_req stag_inv;
-} __attribute__((packed));
-
-
-/*
- * RQ WRs
- */
-struct c2wr_rqwr {
-       struct c2_rq_hdr rq_hdr;
-       u8 data[0];             /* array of SGEs */
-} __attribute__((packed));
-
-union c2wr_recv {
-       struct c2wr_rqwr req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-/*
- * All AEs start with this header.  Most AEs only need to convey the
- * information in the header.  Some, like LLP connection events, need
- * more info.  The union typdef c2wr_ae_t has all the possible AEs.
- *
- * hdr.context is the user_context from the rnic_open WR.  NULL If this
- * is not affiliated with an rnic
- *
- * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
- * CCAE_LLP_CLOSE_COMPLETE)
- *
- * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
- *
- * user_context is the context passed down when the host created the resource.
- */
-struct c2wr_ae_hdr {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* user context for this res. */
-       __be32 resource_type;   /* see enum c2_resource_indicator */
-       __be32 resource;        /* handle for resource */
-       __be32 qp_state;        /* current QP State */
-} __attribute__((packed));
-
-/*
- * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
- * the adapter moves the QP into RTS state
- */
-struct c2wr_ae_active_connect_results {
-       struct c2wr_ae_hdr ae_hdr;
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-/*
- * When connections are established by the stack (and the private data
- * MPA frame is received), the adapter will generate an event to the host.
- * The details of the connection, any private data, and the new connection
- * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
- * AE queue:
- */
-struct c2wr_ae_connection_request {
-       struct c2wr_ae_hdr ae_hdr;
-       u32 cr_handle;          /* connreq handle (sock ptr) */
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-union c2wr_ae {
-       struct c2wr_ae_hdr ae_generic;
-       struct c2wr_ae_active_connect_results ae_active_connect_results;
-       struct c2wr_ae_connection_request ae_connection_request;
-} __attribute__((packed));
-
-struct c2wr_init_req {
-       struct c2wr_hdr hdr;
-       __be64 hint_count;
-       __be64 q0_host_shared;
-       __be64 q1_host_shared;
-       __be64 q1_host_msg_pool;
-       __be64 q2_host_shared;
-       __be64 q2_host_msg_pool;
-} __attribute__((packed));
-
-struct c2wr_init_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_init {
-       struct c2wr_init_req req;
-       struct c2wr_init_rep rep;
-} __attribute__((packed));
-
-/*
- * For upgrading flash.
- */
-
-struct c2wr_flash_init_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-struct c2wr_flash_init_rep {
-       struct c2wr_hdr hdr;
-       u32 adapter_flash_buf_offset;
-       u32 adapter_flash_len;
-} __attribute__((packed));
-
-union c2wr_flash_init {
-       struct c2wr_flash_init_req req;
-       struct c2wr_flash_init_rep rep;
-} __attribute__((packed));
-
-struct c2wr_flash_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 len;
-} __attribute__((packed));
-
-struct c2wr_flash_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash {
-       struct c2wr_flash_req req;
-       struct c2wr_flash_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 size;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 offset;             /* 0 if mem not available */
-       u32 size;               /* 0 if mem not available */
-} __attribute__((packed));
-
-union c2wr_buf_alloc {
-       struct c2wr_buf_alloc_req req;
-       struct c2wr_buf_alloc_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_free_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;             /* Must match value from alloc */
-       u32 size;               /* Must match value from alloc */
-} __attribute__((packed));
-
-struct c2wr_buf_free_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_buf_free {
-       struct c2wr_buf_free_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_flash_write_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;
-       u32 size;
-       u32 type;
-       u32 flags;
-} __attribute__((packed));
-
-struct c2wr_flash_write_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash_write {
-       struct c2wr_flash_write_req req;
-       struct c2wr_flash_write_rep rep;
-} __attribute__((packed));
-
-/*
- * Messages for LLP connection setup.
- */
-
-/*
- * Listen Request.  This allocates a listening endpoint to allow passive
- * connection setup.  Newly established LLP connections are passed up
- * via an AE.  See c2wr_ae_connection_request_t
- */
-struct c2wr_ep_listen_create_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* returned in AEs. */
-       u32 rnic_handle;
-       __be32 local_addr;              /* local addr, or 0  */
-       __be16 local_port;              /* 0 means "pick one" */
-       u16 pad;
-       __be32 backlog;         /* tradional tcp listen bl */
-} __attribute__((packed));
-
-struct c2wr_ep_listen_create_rep {
-       struct c2wr_hdr hdr;
-       u32 ep_handle;          /* handle to new listening ep */
-       u16 local_port;         /* resulting port... */
-       u16 pad;
-} __attribute__((packed));
-
-union c2wr_ep_listen_create {
-       struct c2wr_ep_listen_create_req req;
-       struct c2wr_ep_listen_create_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_ep_listen_destroy {
-       struct c2wr_ep_listen_destroy_req req;
-       struct c2wr_ep_listen_destroy_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_query_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-} __attribute__((packed));
-
-union c2wr_ep_query {
-       struct c2wr_ep_query_req req;
-       struct c2wr_ep_query_rep rep;
-} __attribute__((packed));
-
-
-/*
- * The host passes this down to indicate acceptance of a pending iWARP
- * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
- * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
- */
-struct c2wr_cr_accept_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;          /* QP to bind to this LLP conn */
-       u32 ep_handle;          /* LLP  handle to accept */
-       __be32 private_data_length;
-       u8 private_data[0];     /* data in-line in msg. */
-} __attribute__((packed));
-
-/*
- * adapter sends reply when private data is successfully submitted to
- * the LLP.
- */
-struct c2wr_cr_accept_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_accept {
-       struct c2wr_cr_accept_req req;
-       struct c2wr_cr_accept_rep rep;
-} __attribute__((packed));
-
-/*
- * The host sends this down if a given iWARP connection request was
- * rejected by the consumer.  The cr_handle was obtained from a
- * previous c2wr_ae_connection_request_t AE sent by the adapter.
- */
-struct  c2wr_cr_reject_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;          /* LLP handle to reject */
-} __attribute__((packed));
-
-/*
- * Dunno if this is needed, but we'll add it for now.  The adapter will
- * send the reject_reply after the LLP endpoint has been destroyed.
- */
-struct  c2wr_cr_reject_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_reject {
-       struct c2wr_cr_reject_req req;
-       struct c2wr_cr_reject_rep rep;
-} __attribute__((packed));
-
-/*
- * console command.  Used to implement a debug console over the verbs
- * request and reply queues.
- */
-
-/*
- * Console request message.  It contains:
- *     - message hdr with id = CCWR_CONSOLE
- *     - the physaddr/len of host memory to be used for the reply.
- *     - the command string.  eg:  "netstat -s" or "zoneinfo"
- */
-struct c2wr_console_req {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u64 reply_buf;          /* pinned host buf for reply */
-       u32 reply_buf_len;      /* length of reply buffer */
-       u8 command[0];          /* NUL terminated ascii string */
-       /* containing the command req */
-} __attribute__((packed));
-
-/*
- * flags used in the console reply.
- */
-enum c2_console_flags {
-       CONS_REPLY_TRUNCATED = 0x00000001       /* reply was truncated */
-} __attribute__((packed));
-
-/*
- * Console reply message.
- * hdr.result contains the c2_status_t error if the reply was _not_ generated,
- * or C2_OK if the reply was generated.
- */
-struct c2wr_console_rep {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_console {
-       struct c2wr_console_req req;
-       struct c2wr_console_rep rep;
-} __attribute__((packed));
-
-
-/*
- * Giant union with all WRs.  Makes life easier...
- */
-union c2wr {
-       struct c2wr_hdr hdr;
-       struct c2wr_user_hdr user_hdr;
-       union c2wr_rnic_open rnic_open;
-       union c2wr_rnic_query rnic_query;
-       union c2wr_rnic_getconfig rnic_getconfig;
-       union c2wr_rnic_setconfig rnic_setconfig;
-       union c2wr_rnic_close rnic_close;
-       union c2wr_cq_create cq_create;
-       union c2wr_cq_modify cq_modify;
-       union c2wr_cq_destroy cq_destroy;
-       union c2wr_pd_alloc pd_alloc;
-       union c2wr_pd_dealloc pd_dealloc;
-       union c2wr_srq_create srq_create;
-       union c2wr_srq_destroy srq_destroy;
-       union c2wr_qp_create qp_create;
-       union c2wr_qp_query qp_query;
-       union c2wr_qp_modify qp_modify;
-       union c2wr_qp_destroy qp_destroy;
-       struct c2wr_qp_connect qp_connect;
-       union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
-       union c2wr_nsmr_register nsmr_register;
-       union c2wr_nsmr_pbl nsmr_pbl;
-       union c2wr_mr_query mr_query;
-       union c2wr_mw_query mw_query;
-       union c2wr_stag_dealloc stag_dealloc;
-       union c2wr_sqwr sqwr;
-       struct c2wr_rqwr rqwr;
-       struct c2wr_ce ce;
-       union c2wr_ae ae;
-       union c2wr_init init;
-       union c2wr_ep_listen_create ep_listen_create;
-       union c2wr_ep_listen_destroy ep_listen_destroy;
-       union c2wr_cr_accept cr_accept;
-       union c2wr_cr_reject cr_reject;
-       union c2wr_console console;
-       union c2wr_flash_init flash_init;
-       union c2wr_flash flash;
-       union c2wr_buf_alloc buf_alloc;
-       union c2wr_buf_free buf_free;
-       union c2wr_flash_write flash_write;
-} __attribute__((packed));
-
-
-/*
- * Accessors for the wr fields that are packed together tightly to
- * reduce the wr message size.  The wr arguments are void* so that
- * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
- * in the struct c2wr union can be passed in.
- */
-static __inline__ u8 c2_wr_get_id(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->id;
-}
-static __inline__ void c2_wr_set_id(void *wr, u8 id)
-{
-       ((struct c2wr_hdr *) wr)->id = id;
-}
-static __inline__ u8 c2_wr_get_result(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->result;
-}
-static __inline__ void c2_wr_set_result(void *wr, u8 result)
-{
-       ((struct c2wr_hdr *) wr)->result = result;
-}
-static __inline__ u8 c2_wr_get_flags(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->flags;
-}
-static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
-{
-       ((struct c2wr_hdr *) wr)->flags = flags;
-}
-static __inline__ u8 c2_wr_get_sge_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->sge_count;
-}
-static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
-{
-       ((struct c2wr_hdr *) wr)->sge_count = sge_count;
-}
-static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->wqe_count;
-}
-static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
-{
-       ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
-}
-
-#endif                         /* _C2_WR_H_ */
diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
deleted file mode 100644 (file)
index 3fadd2a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-config INFINIBAND_EHCA
-       tristate "eHCA support"
-       depends on IBMEBUS
-       ---help---
-       This driver supports the deprecated IBM pSeries eHCA InfiniBand
-       adapter.
-
-       To compile the driver as a module, choose M here. The module
-       will be called ib_ehca.
-
diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
deleted file mode 100644 (file)
index 74d284e..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Authors: Heiko J Schick <schickhj@de.ibm.com>
-#           Christoph Raisch <raisch@de.ibm.com>
-#           Joachim Fenkes <fenkes@de.ibm.com>
-#
-#  Copyright (c) 2005 IBM Corporation
-#
-#  All rights reserved.
-#
-#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
-
-obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
-
-ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
-               ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
-               ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
-
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
deleted file mode 100644 (file)
index 199a4a6..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-9/2015
-
-The ehca driver has been deprecated and moved to drivers/staging/rdma.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
deleted file mode 100644 (file)
index 94e088c..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  address vector functions
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *av_cache;
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd)
-{
-       int path = ib_rate_to_mult(path_rate);
-       int link, ret;
-       struct ib_port_attr pa;
-
-       if (path_rate == IB_RATE_PORT_CURRENT) {
-               *ipd = 0;
-               return 0;
-       }
-
-       if (unlikely(path < 0)) {
-               ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
-                        path_rate);
-               return -EINVAL;
-       }
-
-       ret = ehca_query_port(&shca->ib_device, port, &pa);
-       if (unlikely(ret < 0)) {
-               ehca_err(&shca->ib_device, "Failed to query port  ret=%i", ret);
-               return ret;
-       }
-
-       link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
-
-       if (path >= link)
-               /* no need to throttle if path faster than link */
-               *ipd = 0;
-       else
-               /* IPD = round((link / path) - 1) */
-               *ipd = ((link + (path >> 1)) / path) - 1;
-
-       return 0;
-}
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       int ret;
-       struct ehca_av *av;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-
-       av = kmem_cache_alloc(av_cache, GFP_KERNEL);
-       if (!av) {
-               ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
-                        pd, ah_attr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       av->av.sl = ah_attr->sl;
-       av->av.dlid = ah_attr->dlid;
-       av->av.slid_path_bits = ah_attr->src_path_bits;
-
-       if (ehca_static_rate < 0) {
-               u32 ipd;
-
-               if (ehca_calc_ipd(shca, ah_attr->port_num,
-                                 ah_attr->static_rate, &ipd)) {
-                       ret = -EINVAL;
-                       goto create_ah_exit1;
-               }
-               av->av.ipd = ipd;
-       } else
-               av->av.ipd = ehca_static_rate;
-
-       av->av.lnh = ah_attr->ah_flags;
-       av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                           ah_attr->grh.traffic_class);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                           ah_attr->grh.flow_label);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                           ah_attr->grh.hop_limit);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(pd->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(pd->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
-       }
-       av->av.pmtu = shca->max_mtu;
-
-       /* dgid comes in grh.word_3 */
-       memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       return &av->ib_ah;
-
-create_ah_exit1:
-       kmem_cache_free(av_cache, av);
-
-       return ERR_PTR(ret);
-}
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av;
-       struct ehca_ud_av new_ehca_av;
-       struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
-                                             ib_device);
-
-       memset(&new_ehca_av, 0, sizeof(new_ehca_av));
-       new_ehca_av.sl = ah_attr->sl;
-       new_ehca_av.dlid = ah_attr->dlid;
-       new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
-       new_ehca_av.ipd = ah_attr->static_rate;
-       new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
-                                        (ah_attr->ah_flags & IB_AH_GRH) > 0);
-       new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                               ah_attr->grh.traffic_class);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                                ah_attr->grh.flow_label);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                                ah_attr->grh.hop_limit);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
-
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(ah->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ehca_err(ah->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x",
-                                rc, ah, ah_attr, ah_attr->port_num);
-                       return -EINVAL;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(ah->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ehca_err(ah->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x "
-                                "sgid_index=%x",
-                                rc, ah, ah_attr, ah_attr->port_num,
-                                ah_attr->grh.sgid_index);
-                       return -EINVAL;
-               }
-               memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
-       }
-
-       new_ehca_av.pmtu = shca->max_mtu;
-
-       memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       av = container_of(ah, struct ehca_av, ib_ah);
-       av->av = new_ehca_av;
-
-       return 0;
-}
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-
-       memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
-              sizeof(ah_attr->grh.dgid));
-       ah_attr->sl = av->av.sl;
-
-       ah_attr->dlid = av->av.dlid;
-
-       ah_attr->src_path_bits = av->av.slid_path_bits;
-       ah_attr->static_rate = av->av.ipd;
-       ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
-       ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
-                                                   av->av.grh.word_0);
-       ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
-                                               av->av.grh.word_0);
-       ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
-                                                av->av.grh.word_0);
-
-       return 0;
-}
-
-int ehca_destroy_ah(struct ib_ah *ah)
-{
-       kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
-
-       return 0;
-}
-
-int ehca_init_av_cache(void)
-{
-       av_cache = kmem_cache_create("ehca_cache_av",
-                                  sizeof(struct ehca_av), 0,
-                                  SLAB_HWCACHE_ALIGN,
-                                  NULL);
-       if (!av_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_av_cache(void)
-{
-       kmem_cache_destroy(av_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
deleted file mode 100644 (file)
index bd45e0f..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Struct definition for eHCA internal structures
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_H__
-#define __EHCA_CLASSES_H__
-
-struct ehca_module;
-struct ehca_qp;
-struct ehca_cq;
-struct ehca_eq;
-struct ehca_mr;
-struct ehca_mw;
-struct ehca_pd;
-struct ehca_av;
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
-
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
-#include "ipz_pt_fn.h"
-#include "ehca_qes.h"
-#include "ehca_irq.h"
-
-#define EHCA_EQE_CACHE_SIZE 20
-#define EHCA_MAX_NUM_QUEUES 0xffff
-
-struct ehca_eqe_cache_entry {
-       struct ehca_eqe *eqe;
-       struct ehca_cq *cq;
-};
-
-struct ehca_eq {
-       u32 length;
-       struct ipz_queue ipz_queue;
-       struct ipz_eq_handle ipz_eq_handle;
-       struct work_struct work;
-       struct h_galpas galpas;
-       int is_initialized;
-       struct ehca_pfeq pf;
-       spinlock_t spinlock;
-       struct tasklet_struct interrupt_task;
-       u32 ist;
-       spinlock_t irq_spinlock;
-       struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
-};
-
-struct ehca_sma_attr {
-       u16 lid, lmc, sm_sl, sm_lid;
-       u16 pkey_tbl_len, pkeys[16];
-};
-
-struct ehca_sport {
-       struct ib_cq *ibcq_aqp1;
-       struct ib_qp *ibqp_sqp[2];
-       /* lock to serialze modify_qp() calls for sqp in normal
-        * and irq path (when event PORT_ACTIVE is received first time)
-        */
-       spinlock_t mod_sqp_lock;
-       enum ib_port_state port_state;
-       struct ehca_sma_attr saved_attr;
-       u32 pma_qp_nr;
-};
-
-#define HCA_CAP_MR_PGSIZE_4K  0x80000000
-#define HCA_CAP_MR_PGSIZE_64K 0x40000000
-#define HCA_CAP_MR_PGSIZE_1M  0x20000000
-#define HCA_CAP_MR_PGSIZE_16M 0x10000000
-
-struct ehca_shca {
-       struct ib_device ib_device;
-       struct platform_device *ofdev;
-       u8 num_ports;
-       int hw_level;
-       struct list_head shca_list;
-       struct ipz_adapter_handle ipz_hca_handle;
-       struct ehca_sport sport[2];
-       struct ehca_eq eq;
-       struct ehca_eq neq;
-       struct ehca_mr *maxmr;
-       struct ehca_pd *pd;
-       struct h_galpas galpas;
-       struct mutex modify_mutex;
-       u64 hca_cap;
-       /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
-       u32 hca_cap_mr_pgsize;
-       int max_mtu;
-       int max_num_qps;
-       int max_num_cqs;
-       atomic_t num_cqs;
-       atomic_t num_qps;
-};
-
-struct ehca_pd {
-       struct ib_pd ib_pd;
-       struct ipz_pd fw_pd;
-       /* small queue mgmt */
-       struct mutex lock;
-       struct list_head free[2];
-       struct list_head full[2];
-};
-
-enum ehca_ext_qp_type {
-       EQPT_NORMAL    = 0,
-       EQPT_LLQP      = 1,
-       EQPT_SRQBASE   = 2,
-       EQPT_SRQ       = 3,
-};
-
-/* struct to cache modify_qp()'s parms for GSI/SMI qp */
-struct ehca_mod_qp_parm {
-       int mask;
-       struct ib_qp_attr attr;
-};
-
-#define EHCA_MOD_QP_PARM_MAX 4
-
-#define QMAP_IDX_MASK 0xFFFFULL
-
-/* struct for tracking if cqes have been reported to the application */
-struct ehca_qmap_entry {
-       u16 app_wr_id;
-       u8 reported;
-       u8 cqe_req;
-};
-
-struct ehca_queue_map {
-       struct ehca_qmap_entry *map;
-       unsigned int entries;
-       unsigned int tail;
-       unsigned int left_to_poll;
-       unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
-};
-
-/* function to calculate the next index for the qmap */
-static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
-{
-       unsigned int temp = cur_index + 1;
-       return (temp == limit) ? 0 : temp;
-}
-
-struct ehca_qp {
-       union {
-               struct ib_qp ib_qp;
-               struct ib_srq ib_srq;
-       };
-       u32 qp_type;
-       enum ehca_ext_qp_type ext_type;
-       enum ib_qp_state state;
-       struct ipz_queue ipz_squeue;
-       struct ehca_queue_map sq_map;
-       struct ipz_queue ipz_rqueue;
-       struct ehca_queue_map rq_map;
-       struct h_galpas galpas;
-       u32 qkey;
-       u32 real_qp_num;
-       u32 token;
-       spinlock_t spinlock_s;
-       spinlock_t spinlock_r;
-       u32 sq_max_inline_data_size;
-       struct ipz_qp_handle ipz_qp_handle;
-       struct ehca_pfqp pf;
-       struct ib_qp_init_attr init_attr;
-       struct ehca_cq *send_cq;
-       struct ehca_cq *recv_cq;
-       unsigned int sqerr_purgeflag;
-       struct hlist_node list_entries;
-       /* array to cache modify_qp()'s parms for GSI/SMI qp */
-       struct ehca_mod_qp_parm *mod_qp_parm;
-       int mod_qp_parm_idx;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_squeue;
-       u32 mm_count_rqueue;
-       u32 mm_count_galpa;
-       /* unsolicited ack circumvention */
-       int unsol_ack_circ;
-       int mtu_shift;
-       u32 message_count;
-       u32 packet_count;
-       atomic_t nr_events; /* events seen */
-       wait_queue_head_t wait_completion;
-       int mig_armed;
-       struct list_head sq_err_node;
-       struct list_head rq_err_node;
-};
-
-#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
-#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
-#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
-
-/* must be power of 2 */
-#define QP_HASHTAB_LEN 8
-
-struct ehca_cq {
-       struct ib_cq ib_cq;
-       struct ipz_queue ipz_queue;
-       struct h_galpas galpas;
-       spinlock_t spinlock;
-       u32 cq_number;
-       u32 token;
-       u32 nr_of_entries;
-       struct ipz_cq_handle ipz_cq_handle;
-       struct ehca_pfcq pf;
-       spinlock_t cb_lock;
-       struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
-       struct list_head entry;
-       u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
-       atomic_t nr_events; /* #events seen */
-       wait_queue_head_t wait_completion;
-       spinlock_t task_lock;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_queue;
-       u32 mm_count_galpa;
-       struct list_head sqp_err_list;
-       struct list_head rqp_err_list;
-};
-
-enum ehca_mr_flag {
-       EHCA_MR_FLAG_FMR = 0x80000000,   /* FMR, created with ehca_alloc_fmr */
-       EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
-};
-
-struct ehca_mr {
-       union {
-               struct ib_mr ib_mr;     /* must always be first in ehca_mr */
-               struct ib_fmr ib_fmr;   /* must always be first in ehca_mr */
-       } ib;
-       struct ib_umem *umem;
-       spinlock_t mrlock;
-
-       enum ehca_mr_flag flags;
-       u32 num_kpages;         /* number of kernel pages */
-       u32 num_hwpages;        /* number of hw pages to form MR */
-       u64 hwpage_size;        /* hw page size used for this MR */
-       int acl;                /* ACL (stored here for usage in reregister) */
-       u64 *start;             /* virtual start address (stored here for */
-                               /* usage in reregister) */
-       u64 size;               /* size (stored here for usage in reregister) */
-       u32 fmr_page_size;      /* page size for FMR */
-       u32 fmr_max_pages;      /* max pages for FMR */
-       u32 fmr_max_maps;       /* max outstanding maps for FMR */
-       u32 fmr_map_cnt;        /* map counter for FMR */
-       /* fw specific data */
-       struct ipz_mrmw_handle ipz_mr_handle;   /* MR handle for h-calls */
-       struct h_galpas galpas;
-};
-
-struct ehca_mw {
-       struct ib_mw ib_mw;     /* gen2 mw, must always be first in ehca_mw */
-       spinlock_t mwlock;
-
-       u8 never_bound;         /* indication MW was never bound */
-       struct ipz_mrmw_handle ipz_mw_handle;   /* MW handle for h-calls */
-       struct h_galpas galpas;
-};
-
-enum ehca_mr_pgi_type {
-       EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
-                                 * ehca_rereg_phys_mr,
-                                 * ehca_reg_internal_maxmr */
-       EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
-       EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
-};
-
-struct ehca_mr_pginfo {
-       enum ehca_mr_pgi_type type;
-       u64 num_kpages;
-       u64 kpage_cnt;
-       u64 hwpage_size;     /* hw page size used for this MR */
-       u64 num_hwpages;     /* number of hw pages */
-       u64 hwpage_cnt;      /* counter for hw pages */
-       u64 next_hwpage;     /* next hw page in buffer/chunk/listelem */
-
-       union {
-               struct { /* type EHCA_MR_PGI_PHYS section */
-                       int num_phys_buf;
-                       struct ib_phys_buf *phys_buf_array;
-                       u64 next_buf;
-               } phy;
-               struct { /* type EHCA_MR_PGI_USER section */
-                       struct ib_umem *region;
-                       struct scatterlist *next_sg;
-                       u64 next_nmap;
-               } usr;
-               struct { /* type EHCA_MR_PGI_FMR section */
-                       u64 fmr_pgsize;
-                       u64 *page_list;
-                       u64 next_listelem;
-               } fmr;
-       } u;
-};
-
-/* output parameters for MR/FMR hipz calls */
-struct ehca_mr_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 lkey;
-       u32 rkey;
-       u64 len;
-       u64 vaddr;
-       u32 acl;
-};
-
-/* output parameters for MW hipz calls */
-struct ehca_mw_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 rkey;
-};
-
-struct ehca_av {
-       struct ib_ah ib_ah;
-       struct ehca_ud_av av;
-};
-
-struct ehca_ucontext {
-       struct ib_ucontext ib_ucontext;
-};
-
-int ehca_init_pd_cache(void);
-void ehca_cleanup_pd_cache(void);
-int ehca_init_cq_cache(void);
-void ehca_cleanup_cq_cache(void);
-int ehca_init_qp_cache(void);
-void ehca_cleanup_qp_cache(void);
-int ehca_init_av_cache(void);
-void ehca_cleanup_av_cache(void);
-int ehca_init_mrmw_cache(void);
-void ehca_cleanup_mrmw_cache(void);
-int ehca_init_small_qp_cache(void);
-void ehca_cleanup_small_qp_cache(void);
-
-extern rwlock_t ehca_qp_idr_lock;
-extern rwlock_t ehca_cq_idr_lock;
-extern struct idr ehca_qp_idr;
-extern struct idr ehca_cq_idr;
-extern spinlock_t shca_list_lock;
-
-extern int ehca_static_rate;
-extern int ehca_port_act_time;
-extern bool ehca_use_hp_mr;
-extern bool ehca_scaling_code;
-extern int ehca_lock_hcalls;
-extern int ehca_nr_ports;
-extern int ehca_max_cq;
-extern int ehca_max_qp;
-
-struct ipzu_queue_resp {
-       u32 qe_size;      /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length; /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;
-       u32 offset; /* save offset within a page for small_qp */
-};
-
-struct ehca_create_cq_resp {
-       u32 cq_number;
-       u32 token;
-       struct ipzu_queue_resp ipz_queue;
-       u32 fw_handle_ofs;
-       u32 dummy;
-};
-
-struct ehca_create_qp_resp {
-       u32 qp_num;
-       u32 token;
-       u32 qp_type;
-       u32 ext_type;
-       u32 qkey;
-       /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
-       u32 real_qp_num;
-       u32 fw_handle_ofs;
-       u32 dummy;
-       struct ipzu_queue_resp ipz_squeue;
-       struct ipzu_queue_resp ipz_rqueue;
-};
-
-struct ehca_alloc_cq_parms {
-       u32 nr_cqe;
-       u32 act_nr_of_entries;
-       u32 act_pages;
-       struct ipz_eq_handle eq_handle;
-};
-
-enum ehca_service_type {
-       ST_RC  = 0,
-       ST_UC  = 1,
-       ST_RD  = 2,
-       ST_UD  = 3,
-};
-
-enum ehca_ll_comp_flags {
-       LLQP_SEND_COMP = 0x20,
-       LLQP_RECV_COMP = 0x40,
-       LLQP_COMP_MASK = 0x60,
-};
-
-struct ehca_alloc_queue_parms {
-       /* input parameters */
-       int max_wr;
-       int max_sge;
-       int page_size;
-       int is_small;
-
-       /* output parameters */
-       u16 act_nr_wqes;
-       u8  act_nr_sges;
-       u32 queue_size; /* bytes for small queues, pages otherwise */
-};
-
-struct ehca_alloc_qp_parms {
-       struct ehca_alloc_queue_parms squeue;
-       struct ehca_alloc_queue_parms rqueue;
-
-       /* input parameters */
-       enum ehca_service_type servicetype;
-       int qp_storage;
-       int sigtype;
-       enum ehca_ext_qp_type ext_type;
-       enum ehca_ll_comp_flags ll_comp_flags;
-       int ud_av_l_key_ctl;
-
-       u32 token;
-       struct ipz_eq_handle eq_handle;
-       struct ipz_pd pd;
-       struct ipz_cq_handle send_cq_handle, recv_cq_handle;
-
-       u32 srq_qpn, srq_token, srq_limit;
-
-       /* output parameters */
-       u32 real_qp_num;
-       struct ipz_qp_handle qp_handle;
-       struct h_galpas galpas;
-};
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
deleted file mode 100644 (file)
index 689c357..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  pSeries interface definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_PSERIES_H__
-#define __EHCA_CLASSES_PSERIES_H__
-
-#include "hcp_phyp.h"
-#include "ipz_pt_fn.h"
-
-
-struct ehca_pfqp {
-       struct ipz_qpt sqpt;
-       struct ipz_qpt rqpt;
-};
-
-struct ehca_pfcq {
-       struct ipz_qpt qpt;
-       u32 cqnr;
-};
-
-struct ehca_pfeq {
-       struct ipz_qpt qpt;
-       struct h_galpa galpa;
-       u32 eqnr;
-};
-
-struct ipz_adapter_handle {
-       u64 handle;
-};
-
-struct ipz_cq_handle {
-       u64 handle;
-};
-
-struct ipz_eq_handle {
-       u64 handle;
-};
-
-struct ipz_qp_handle {
-       u64 handle;
-};
-struct ipz_mrmw_handle {
-       u64 handle;
-};
-
-struct ipz_pd {
-       u32 value;
-};
-
-struct hcp_modify_qp_control_block {
-       u32 qkey;                      /* 00 */
-       u32 rdd;                       /* reliable datagram domain */
-       u32 send_psn;                  /* 02 */
-       u32 receive_psn;               /* 03 */
-       u32 prim_phys_port;            /* 04 */
-       u32 alt_phys_port;             /* 05 */
-       u32 prim_p_key_idx;            /* 06 */
-       u32 alt_p_key_idx;             /* 07 */
-       u32 rdma_atomic_ctrl;          /* 08 */
-       u32 qp_state;                  /* 09 */
-       u32 reserved_10;               /* 10 */
-       u32 rdma_nr_atomic_resp_res;   /* 11 */
-       u32 path_migration_state;      /* 12 */
-       u32 rdma_atomic_outst_dest_qp; /* 13 */
-       u32 dest_qp_nr;                /* 14 */
-       u32 min_rnr_nak_timer_field;   /* 15 */
-       u32 service_level;             /* 16 */
-       u32 send_grh_flag;             /* 17 */
-       u32 retry_count;               /* 18 */
-       u32 timeout;                   /* 19 */
-       u32 path_mtu;                  /* 20 */
-       u32 max_static_rate;           /* 21 */
-       u32 dlid;                      /* 22 */
-       u32 rnr_retry_count;           /* 23 */
-       u32 source_path_bits;          /* 24 */
-       u32 traffic_class;             /* 25 */
-       u32 hop_limit;                 /* 26 */
-       u32 source_gid_idx;            /* 27 */
-       u32 flow_label;                /* 28 */
-       u32 reserved_29;               /* 29 */
-       union {                        /* 30 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid;
-       u32 service_level_al;          /* 34 */
-       u32 send_grh_flag_al;          /* 35 */
-       u32 retry_count_al;            /* 36 */
-       u32 timeout_al;                /* 37 */
-       u32 max_static_rate_al;        /* 38 */
-       u32 dlid_al;                   /* 39 */
-       u32 rnr_retry_count_al;        /* 40 */
-       u32 source_path_bits_al;       /* 41 */
-       u32 traffic_class_al;          /* 42 */
-       u32 hop_limit_al;              /* 43 */
-       u32 source_gid_idx_al;         /* 44 */
-       u32 flow_label_al;             /* 45 */
-       u32 reserved_46;               /* 46 */
-       u32 reserved_47;               /* 47 */
-       union {                        /* 48 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid_al;
-       u32 max_nr_outst_send_wr;      /* 52 */
-       u32 max_nr_outst_recv_wr;      /* 53 */
-       u32 disable_ete_credit_check;  /* 54 */
-       u32 qp_number;                 /* 55 */
-       u64 send_queue_handle;         /* 56 */
-       u64 recv_queue_handle;         /* 58 */
-       u32 actual_nr_sges_in_sq_wqe;  /* 60 */
-       u32 actual_nr_sges_in_rq_wqe;  /* 61 */
-       u32 qp_enable;                 /* 62 */
-       u32 curr_srq_limit;            /* 63 */
-       u64 qp_aff_asyn_ev_log_reg;    /* 64 */
-       u64 shared_rq_hndl;            /* 66 */
-       u64 trigg_doorbell_qp_hndl;    /* 68 */
-       u32 reserved_70_127[58];       /* 70 */
-};
-
-#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM( 0,  0)
-#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM( 2,  2)
-#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM( 3,  3)
-#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM( 4,  4)
-#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM( 5,  5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM( 6,  6)
-#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
-#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
-#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14, 14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15, 15)
-#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16, 16)
-#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17, 17)
-#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
-#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
-#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
-#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
-
-#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
deleted file mode 100644 (file)
index 1aa7931..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Completion queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *cq_cache;
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
-{
-       unsigned int qp_num = qp->real_qp_num;
-       unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-
-       ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
-                cq->cq_number, qp_num);
-
-       return 0;
-}
-
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
-{
-       int ret = -EINVAL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       hlist_del(iter);
-                       ehca_dbg(cq->ib_cq.device,
-                                "removed qp from cq .cq_num=%x real_qp_num=%x",
-                                cq->cq_number, real_qp_num);
-                       ret = 0;
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-       if (ret)
-               ehca_err(cq->ib_cq.device,
-                        "qp not found cq_num=%x real_qp_num=%x",
-                        cq->cq_number, real_qp_num);
-
-       return ret;
-}
-
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
-{
-       struct ehca_qp *ret = NULL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       ret = qp;
-                       break;
-               }
-       }
-       return ret;
-}
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata)
-{
-       int cqe = attr->cqe;
-       static const u32 additional_cqe = 20;
-       struct ib_cq *cq;
-       struct ehca_cq *my_cq;
-       struct ehca_shca *shca =
-               container_of(device, struct ehca_shca, ib_device);
-       struct ipz_adapter_handle adapter_handle;
-       struct ehca_alloc_cq_parms param; /* h_call's out parameters */
-       struct h_galpa gal;
-       void *vpage;
-       u32 counter;
-       u64 rpage, cqx_fec, h_ret;
-       int rc, i;
-       unsigned long flags;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
-               return ERR_PTR(-EINVAL);
-
-       if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
-               ehca_err(device, "Unable to create CQ, max number of %i "
-                       "CQs reached.", shca->max_num_cqs);
-               ehca_err(device, "To increase the maximum number of CQs "
-                       "use the number_of_cqs module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
-       if (!my_cq) {
-               ehca_err(device, "Out of memory for ehca_cq struct device=%p",
-                        device);
-               atomic_dec(&shca->num_cqs);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
-
-       spin_lock_init(&my_cq->spinlock);
-       spin_lock_init(&my_cq->cb_lock);
-       spin_lock_init(&my_cq->task_lock);
-       atomic_set(&my_cq->nr_events, 0);
-       init_waitqueue_head(&my_cq->wait_completion);
-
-       cq = &my_cq->ib_cq;
-
-       adapter_handle = shca->ipz_hca_handle;
-       param.eq_handle = shca->eq.ipz_eq_handle;
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       rc = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-       idr_preload_end();
-
-       if (rc < 0) {
-               cq = ERR_PTR(-ENOMEM);
-               ehca_err(device, "Can't allocate new idr entry. device=%p",
-                        device);
-               goto create_cq_exit1;
-       }
-       my_cq->token = rc;
-
-       /*
-        * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
-        * for receiving errors CQEs.
-        */
-       param.nr_cqe = cqe + additional_cqe;
-       h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-                        "h_ret=%lli device=%p", h_ret, device);
-               cq = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto create_cq_exit2;
-       }
-
-       rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
-                               EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
-       if (!rc) {
-               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
-                        rc, device);
-               cq = ERR_PTR(-EINVAL);
-               goto create_cq_exit3;
-       }
-
-       for (counter = 0; counter < param.act_pages; counter++) {
-               vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-               if (!vpage) {
-                       ehca_err(device, "ipz_qpageit_get_inc() "
-                                "returns NULL device=%p", device);
-                       cq = ERR_PTR(-EAGAIN);
-                       goto create_cq_exit4;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_cq(adapter_handle,
-                                                my_cq->ipz_cq_handle,
-                                                &my_cq->pf,
-                                                0,
-                                                0,
-                                                rpage,
-                                                1,
-                                                my_cq->galpas.
-                                                kernel);
-
-               if (h_ret < H_SUCCESS) {
-                       ehca_err(device, "hipz_h_register_rpage_cq() failed "
-                                "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
-                                "act_pages=%i", my_cq, my_cq->cq_number,
-                                h_ret, counter, param.act_pages);
-                       cq = ERR_PTR(-EINVAL);
-                       goto create_cq_exit4;
-               }
-
-               if (counter == (param.act_pages - 1)) {
-                       vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-                       if ((h_ret != H_SUCCESS) || vpage) {
-                               ehca_err(device, "Registration of pages not "
-                                        "complete ehca_cq=%p cq_num=%x "
-                                        "h_ret=%lli", my_cq, my_cq->cq_number,
-                                        h_ret);
-                               cq = ERR_PTR(-EAGAIN);
-                               goto create_cq_exit4;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(device, "Registration of page failed "
-                                        "ehca_cq=%p cq_num=%x h_ret=%lli "
-                                        "counter=%i act_pages=%i",
-                                        my_cq, my_cq->cq_number,
-                                        h_ret, counter, param.act_pages);
-                               cq = ERR_PTR(-ENOMEM);
-                               goto create_cq_exit4;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(&my_cq->ipz_queue);
-
-       gal = my_cq->galpas.kernel;
-       cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
-       ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
-                my_cq, my_cq->cq_number, cqx_fec);
-
-       my_cq->ib_cq.cqe = my_cq->nr_of_entries =
-               param.act_nr_of_entries - additional_cqe;
-       my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
-
-       for (i = 0; i < QP_HASHTAB_LEN; i++)
-               INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
-
-       INIT_LIST_HEAD(&my_cq->sqp_err_list);
-       INIT_LIST_HEAD(&my_cq->rqp_err_list);
-
-       if (context) {
-               struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
-               struct ehca_create_cq_resp resp;
-               memset(&resp, 0, sizeof(resp));
-               resp.cq_number = my_cq->cq_number;
-               resp.token = my_cq->token;
-               resp.ipz_queue.qe_size = ipz_queue->qe_size;
-               resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
-               resp.ipz_queue.queue_length = ipz_queue->queue_length;
-               resp.ipz_queue.pagesize = ipz_queue->pagesize;
-               resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-               resp.fw_handle_ofs = (u32)
-                       (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
-               if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-                       ehca_err(device, "Copy to udata failed.");
-                       cq = ERR_PTR(-EFAULT);
-                       goto create_cq_exit4;
-               }
-       }
-
-       return cq;
-
-create_cq_exit4:
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-
-create_cq_exit3:
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-       if (h_ret != H_SUCCESS)
-               ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-                        "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
-
-create_cq_exit2:
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-create_cq_exit1:
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return cq;
-}
-
-int ehca_destroy_cq(struct ib_cq *cq)
-{
-       u64 h_ret;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int cq_num = my_cq->cq_number;
-       struct ib_device *device = cq->device;
-       struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       unsigned long flags;
-
-       if (cq->uobject) {
-               if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
-                       ehca_err(device, "Resources still referenced in "
-                                "user space cq_num=%x", my_cq->cq_number);
-                       return -EINVAL;
-               }
-       }
-
-       /*
-        * remove the CQ from the idr first to make sure
-        * no more interrupt tasklets will touch this CQ
-        */
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
-
-       /* nobody's using our CQ any longer -- we can destroy it */
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
-       if (h_ret == H_R_STATE) {
-               /* cq in err: read err data and destroy it forcibly */
-               ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
-                        "state. Try to delete it forcibly.",
-                        my_cq, cq_num, my_cq->ipz_cq_handle.handle);
-               ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
-               h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-               if (h_ret == H_SUCCESS)
-                       ehca_dbg(device, "cq_num=%x deleted successfully.",
-                                cq_num);
-       }
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
-                        "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
-               return ehca2ib_return_code(h_ret);
-       }
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return 0;
-}
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-       /* TODO: proper resize needs to be done */
-       ehca_err(cq->device, "not implemented yet");
-
-       return -EFAULT;
-}
-
-int ehca_init_cq_cache(void)
-{
-       cq_cache = kmem_cache_create("ehca_cache_cq",
-                                    sizeof(struct ehca_cq), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!cq_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_cq_cache(void)
-{
-       kmem_cache_destroy(cq_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
deleted file mode 100644 (file)
index 90da674..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Event queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_qes.h"
-#include "hcp_if.h"
-#include "ipz_pt_fn.h"
-
-int ehca_create_eq(struct ehca_shca *shca,
-                  struct ehca_eq *eq,
-                  const enum ehca_eq_type type, const u32 length)
-{
-       int ret;
-       u64 h_ret;
-       u32 nr_pages;
-       u32 i;
-       void *vpage;
-       struct ib_device *ib_dev = &shca->ib_device;
-
-       spin_lock_init(&eq->spinlock);
-       spin_lock_init(&eq->irq_spinlock);
-       eq->is_initialized = 0;
-
-       if (type != EHCA_EQ && type != EHCA_NEQ) {
-               ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
-               return -EINVAL;
-       }
-       if (!length) {
-               ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-                                        &eq->pf,
-                                        type,
-                                        length,
-                                        &eq->ipz_eq_handle,
-                                        &eq->length,
-                                        &nr_pages, &eq->ist);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
-                            EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
-       if (!ret) {
-               ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
-               goto create_eq_exit1;
-       }
-
-       for (i = 0; i < nr_pages; i++) {
-               u64 rpage;
-
-               vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-               if (!vpage)
-                       goto create_eq_exit2;
-
-               rpage = __pa(vpage);
-               h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-                                                eq->ipz_eq_handle,
-                                                &eq->pf,
-                                                0, 0, rpage, 1);
-
-               if (i == (nr_pages - 1)) {
-                       /* last page */
-                       vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-                       if (h_ret != H_SUCCESS || vpage)
-                               goto create_eq_exit2;
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED)
-                               goto create_eq_exit2;
-               }
-       }
-
-       ipz_qeit_reset(&eq->ipz_queue);
-
-       /* register interrupt handlers and initialize work queues */
-       if (type == EHCA_EQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-                                         0, "ehca_eq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       } else if (type == EHCA_NEQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-                                         0, "ehca_neq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       }
-
-       eq->is_initialized = 1;
-
-       return 0;
-
-create_eq_exit2:
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-create_eq_exit1:
-       hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       return -EINVAL;
-}
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       void *eqe;
-
-       spin_lock_irqsave(&eq->spinlock, flags);
-       eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
-       spin_unlock_irqrestore(&eq->spinlock, flags);
-
-       return eqe;
-}
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       u64 h_ret;
-
-       ibmebus_free_irq(eq->ist, (void *)shca);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       eq->is_initialized = 0;
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       tasklet_kill(&eq->interrupt_task);
-
-       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't free EQ resources.");
-               return -EINVAL;
-       }
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
deleted file mode 100644 (file)
index e8b1bb6..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HCA query functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/gfp.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static unsigned int limit_uint(unsigned int value)
-{
-       return min_t(unsigned int, value, INT_MAX);
-}
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw)
-{
-       int i, ret = 0;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_hca *rblock;
-
-       static const u32 cap_mapping[] = {
-               IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
-               IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
-               IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
-               IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
-               IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
-               IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
-               IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
-               IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
-               IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
-               IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
-               IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
-       };
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto query_device1;
-       }
-
-       memset(props, 0, sizeof(struct ib_device_attr));
-       props->page_size_cap   = shca->hca_cap_mr_pgsize;
-       props->fw_ver          = rblock->hw_ver;
-       props->max_mr_size     = rblock->max_mr_size;
-       props->vendor_id       = rblock->vendor_id >> 8;
-       props->vendor_part_id  = rblock->vendor_part_id >> 16;
-       props->hw_ver          = rblock->hw_ver;
-       props->max_qp          = limit_uint(rblock->max_qp);
-       props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
-       props->max_sge         = limit_uint(rblock->max_sge);
-       props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
-       props->max_cq          = limit_uint(rblock->max_cq);
-       props->max_cqe         = limit_uint(rblock->max_cqe);
-       props->max_mr          = limit_uint(rblock->max_mr);
-       props->max_mw          = limit_uint(rblock->max_mw);
-       props->max_pd          = limit_uint(rblock->max_pd);
-       props->max_ah          = limit_uint(rblock->max_ah);
-       props->max_ee          = limit_uint(rblock->max_rd_ee_context);
-       props->max_rdd         = limit_uint(rblock->max_rd_domain);
-       props->max_fmr         = limit_uint(rblock->max_mr);
-       props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
-       props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
-       props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
-       props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
-       props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               props->max_srq         = limit_uint(props->max_qp);
-               props->max_srq_wr      = limit_uint(props->max_qp_wr);
-               props->max_srq_sge     = 3;
-       }
-
-       props->max_pkeys           = 16;
-       /* Some FW versions say 0 here; insert sensible value in that case */
-       props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
-               min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
-       props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
-       props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
-       props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-       props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
-       props->max_total_mcast_qp_attach
-               = limit_uint(rblock->max_total_mcast_qp_attach);
-
-       /* translate device capabilities */
-       props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
-               IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
-       for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
-               if (rblock->hca_cap_indicators & cap_mapping[i + 1])
-                       props->device_cap_flags |= cap_mapping[i];
-
-query_device1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
-{
-       switch (fw_mtu) {
-       case 0x1:
-               return IB_MTU_256;
-       case 0x2:
-               return IB_MTU_512;
-       case 0x3:
-               return IB_MTU_1024;
-       case 0x4:
-               return IB_MTU_2048;
-       case 0x5:
-               return IB_MTU_4096;
-       default:
-               ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-                        fw_mtu);
-               return 0;
-       }
-}
-
-static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
-{
-       switch (vl_cap) {
-       case 0x1:
-               return 1;
-       case 0x2:
-               return 2;
-       case 0x3:
-               return 4;
-       case 0x4:
-               return 8;
-       case 0x5:
-               return 15;
-       default:
-               ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
-                        vl_cap);
-               return 0;
-       }
-}
-
-int ehca_query_port(struct ib_device *ibdev,
-                   u8 port, struct ib_port_attr *props)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_port1;
-       }
-
-       memset(props, 0, sizeof(struct ib_port_attr));
-
-       props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
-       props->port_cap_flags  = rblock->capability_mask;
-       props->gid_tbl_len     = rblock->gid_tbl_len;
-       if (rblock->max_msg_sz)
-               props->max_msg_sz      = rblock->max_msg_sz;
-       else
-               props->max_msg_sz      = 0x1 << 31;
-       props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
-       props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
-       props->pkey_tbl_len    = rblock->pkey_tbl_len;
-       props->lid             = rblock->lid;
-       props->sm_lid          = rblock->sm_lid;
-       props->lmc             = rblock->lmc;
-       props->sm_sl           = rblock->sm_sl;
-       props->subnet_timeout  = rblock->subnet_timeout;
-       props->init_type_reply = rblock->init_type_reply;
-       props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
-
-       if (rblock->state && rblock->phys_width) {
-               props->phys_state      = rblock->phys_pstate;
-               props->state           = rblock->phys_state;
-               props->active_width    = rblock->phys_width;
-               props->active_speed    = rblock->phys_speed;
-       } else {
-               /* old firmware releases don't report physical
-                * port info, so use default values
-                */
-               props->phys_state      = 5;
-               props->state           = rblock->state;
-               props->active_width    = IB_WIDTH_12X;
-               props->active_speed    = IB_SPEED_SDR;
-       }
-
-query_port1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_sma_attr(struct ehca_shca *shca,
-                       u8 port, struct ehca_sma_attr *attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_sma_attr1;
-       }
-
-       memset(attr, 0, sizeof(struct ehca_sma_attr));
-
-       attr->lid    = rblock->lid;
-       attr->lmc    = rblock->lmc;
-       attr->sm_sl  = rblock->sm_sl;
-       attr->sm_lid = rblock->sm_lid;
-
-       attr->pkey_tbl_len = rblock->pkey_tbl_len;
-       memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
-
-query_sma_attr1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if (index > 16) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_pkey1;
-       }
-
-       memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
-
-query_pkey1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port,
-                  int index, union ib_gid *gid)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       if (index < 0 || index > 255) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_gid1;
-       }
-
-       memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
-       memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
-
-query_gid1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static const u32 allowed_port_caps = (
-       IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
-       IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
-       IB_PORT_VENDOR_CLASS_SUP);
-
-int ehca_modify_port(struct ib_device *ibdev,
-                    u8 port, int port_modify_mask,
-                    struct ib_port_modify *props)
-{
-       int ret = 0;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-       u32 cap;
-       u64 hret;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if ((props->set_port_cap_mask | props->clr_port_cap_mask)
-           & ~allowed_port_caps) {
-               ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
-                        "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
-                        props->clr_port_cap_mask, allowed_port_caps);
-               return -EINVAL;
-       }
-
-       if (mutex_lock_interruptible(&shca->modify_mutex))
-               return -ERESTARTSYS;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               ret = -ENOMEM;
-               goto modify_port1;
-       }
-
-       hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto modify_port2;
-       }
-
-       cap = (rblock->capability_mask | props->set_port_cap_mask)
-               & ~props->clr_port_cap_mask;
-
-       hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
-                                 cap, props->init_type, port_modify_mask);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Modify port failed  h_ret=%lli",
-                        hret);
-               ret = -EINVAL;
-       }
-
-modify_port2:
-       ehca_free_fw_ctrlblock(rblock);
-
-modify_port1:
-       mutex_unlock(&shca->modify_mutex);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
deleted file mode 100644 (file)
index 8615d7c..0000000
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Functions for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/smpboot.h>
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
-#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
-#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
-#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
-#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
-
-#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
-#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
-#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
-#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
-#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
-
-#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
-#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
-
-static void queue_comp_task(struct ehca_cq *__cq);
-
-static struct ehca_comp_pool *pool;
-
-static inline void comp_event_callback(struct ehca_cq *cq)
-{
-       if (!cq->ib_cq.comp_handler)
-               return;
-
-       spin_lock(&cq->cb_lock);
-       cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
-       spin_unlock(&cq->cb_lock);
-
-       return;
-}
-
-static void print_error_data(struct ehca_shca *shca, void *data,
-                            u64 *rblock, int length)
-{
-       u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
-       u64 resource = rblock[1];
-
-       switch (type) {
-       case 0x1: /* Queue Pair */
-       {
-               struct ehca_qp *qp = (struct ehca_qp *)data;
-
-               /* only print error data if AER is set */
-               if (rblock[6] == 0)
-                       return;
-
-               ehca_err(&shca->ib_device,
-                        "QP 0x%x (resource=%llx) has errors.",
-                        qp->ib_qp.qp_num, resource);
-               break;
-       }
-       case 0x4: /* Completion Queue */
-       {
-               struct ehca_cq *cq = (struct ehca_cq *)data;
-
-               ehca_err(&shca->ib_device,
-                        "CQ 0x%x (resource=%llx) has errors.",
-                        cq->cq_number, resource);
-               break;
-       }
-       default:
-               ehca_err(&shca->ib_device,
-                        "Unknown error type: %llx on %s.",
-                        type, shca->ib_device.name);
-               break;
-       }
-
-       ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data begin "
-                "---------------------------------------------------");
-       ehca_dmp(rblock, length, "resource=%llx", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data end "
-                "----------------------------------------------------");
-
-       return;
-}
-
-int ehca_error_data(struct ehca_shca *shca, void *data,
-                   u64 resource)
-{
-
-       unsigned long ret;
-       u64 *rblock;
-       unsigned long block_count;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
-               ret = -ENOMEM;
-               goto error_data1;
-       }
-
-       /* rblock must be 4K aligned and should be 4K large */
-       ret = hipz_h_error_data(shca->ipz_hca_handle,
-                               resource,
-                               rblock,
-                               &block_count);
-
-       if (ret == H_R_STATE)
-               ehca_err(&shca->ib_device,
-                        "No error data is available: %llx.", resource);
-       else if (ret == H_SUCCESS) {
-               int length;
-
-               length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
-
-               if (length > EHCA_PAGESIZE)
-                       length = EHCA_PAGESIZE;
-
-               print_error_data(shca, data, rblock, length);
-       } else
-               ehca_err(&shca->ib_device,
-                        "Error data could not be fetched: %llx", resource);
-
-       ehca_free_fw_ctrlblock(rblock);
-
-error_data1:
-       return ret;
-
-}
-
-static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
-                             enum ib_event_type event_type)
-{
-       struct ib_event event;
-
-       /* PATH_MIG without the QP ever having been armed is false alarm */
-       if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
-               return;
-
-       event.device = &shca->ib_device;
-       event.event = event_type;
-
-       if (qp->ext_type == EQPT_SRQ) {
-               if (!qp->ib_srq.event_handler)
-                       return;
-
-               event.element.srq = &qp->ib_srq;
-               qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
-       } else {
-               if (!qp->ib_qp.event_handler)
-                       return;
-
-               event.element.qp = &qp->ib_qp;
-               qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
-       }
-}
-
-static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
-                             enum ib_event_type event_type, int fatal)
-{
-       struct ehca_qp *qp;
-       u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
-
-       read_lock(&ehca_qp_idr_lock);
-       qp = idr_find(&ehca_qp_idr, token);
-       if (qp)
-               atomic_inc(&qp->nr_events);
-       read_unlock(&ehca_qp_idr_lock);
-
-       if (!qp)
-               return;
-
-       if (fatal)
-               ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
-       dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
-                         IB_EVENT_SRQ_ERR : event_type);
-
-       /*
-        * eHCA only processes one WQE at a time for SRQ base QPs,
-        * so the last WQE has been processed as soon as the QP enters
-        * error state.
-        */
-       if (fatal && qp->ext_type == EQPT_SRQBASE)
-               dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
-
-       if (atomic_dec_and_test(&qp->nr_events))
-               wake_up(&qp->wait_completion);
-       return;
-}
-
-static void cq_event_callback(struct ehca_shca *shca,
-                             u64 eqe)
-{
-       struct ehca_cq *cq;
-       u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
-
-       read_lock(&ehca_cq_idr_lock);
-       cq = idr_find(&ehca_cq_idr, token);
-       if (cq)
-               atomic_inc(&cq->nr_events);
-       read_unlock(&ehca_cq_idr_lock);
-
-       if (!cq)
-               return;
-
-       ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
-
-       if (atomic_dec_and_test(&cq->nr_events))
-               wake_up(&cq->wait_completion);
-
-       return;
-}
-
-static void parse_identifier(struct ehca_shca *shca, u64 eqe)
-{
-       u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
-
-       switch (identifier) {
-       case 0x02: /* path migrated */
-               qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
-               break;
-       case 0x03: /* communication established */
-               qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
-               break;
-       case 0x04: /* send queue drained */
-               qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
-               break;
-       case 0x05: /* QP error */
-       case 0x06: /* QP error */
-               qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
-               break;
-       case 0x07: /* CQ error */
-       case 0x08: /* CQ error */
-               cq_event_callback(shca, eqe);
-               break;
-       case 0x09: /* MRMWPTE error */
-               ehca_err(&shca->ib_device, "MRMWPTE error.");
-               break;
-       case 0x0A: /* port event */
-               ehca_err(&shca->ib_device, "Port event.");
-               break;
-       case 0x0B: /* MR access error */
-               ehca_err(&shca->ib_device, "MR access error.");
-               break;
-       case 0x0C: /* EQ error */
-               ehca_err(&shca->ib_device, "EQ error.");
-               break;
-       case 0x0D: /* P/Q_Key mismatch */
-               ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
-               break;
-       case 0x10: /* sampling complete */
-               ehca_err(&shca->ib_device, "Sampling complete.");
-               break;
-       case 0x11: /* unaffiliated access error */
-               ehca_err(&shca->ib_device, "Unaffiliated access error.");
-               break;
-       case 0x12: /* path migrating */
-               ehca_err(&shca->ib_device, "Path migrating.");
-               break;
-       case 0x13: /* interface trace stopped */
-               ehca_err(&shca->ib_device, "Interface trace stopped.");
-               break;
-       case 0x14: /* first error capture info available */
-               ehca_info(&shca->ib_device, "First error capture available");
-               break;
-       case 0x15: /* SRQ limit reached */
-               qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
-                        identifier, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static void dispatch_port_event(struct ehca_shca *shca, int port_num,
-                               enum ib_event_type type, const char *msg)
-{
-       struct ib_event event;
-
-       ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
-       event.device = &shca->ib_device;
-       event.event = type;
-       event.element.port_num = port_num;
-       ib_dispatch_event(&event);
-}
-
-static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
-{
-       struct ehca_sma_attr  new_attr;
-       struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
-
-       ehca_query_sma_attr(shca, port_num, &new_attr);
-
-       if (new_attr.sm_sl  != old_attr->sm_sl ||
-           new_attr.sm_lid != old_attr->sm_lid)
-               dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
-                                   "SM changed");
-
-       if (new_attr.lid != old_attr->lid ||
-           new_attr.lmc != old_attr->lmc)
-               dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
-                                   "LID changed");
-
-       if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
-           memcmp(new_attr.pkeys, old_attr->pkeys,
-                  sizeof(u16) * new_attr.pkey_tbl_len))
-               dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
-                                   "P_Key changed");
-
-       *old_attr = new_attr;
-}
-
-/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
-static int replay_modify_qp(struct ehca_sport *sport)
-{
-       int aqp1_destroyed;
-       unsigned long flags;
-
-       spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-
-       aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
-
-       if (sport->ibqp_sqp[IB_QPT_SMI])
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
-       if (!aqp1_destroyed)
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
-
-       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
-       return aqp1_destroyed;
-}
-
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
-{
-       u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
-       u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
-       u8 spec_event;
-       struct ehca_sport *sport = &shca->sport[port - 1];
-
-       switch (ec) {
-       case 0x30: /* port availability change */
-               if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-                       /* only replay modify_qp calls in autodetect mode;
-                        * if AQP1 was destroyed, the port is already down
-                        * again and we can drop the event.
-                        */
-                       if (ehca_nr_ports < 0)
-                               if (replay_modify_qp(sport))
-                                       break;
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port, &sport->saved_attr);
-               } else {
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-               }
-               break;
-       case 0x31:
-               /* port configuration change
-                * disruptive change is caused by
-                * LID, PKEY or SM change
-                */
-               if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
-                       ehca_warn(&shca->ib_device, "disruptive port "
-                                 "%d configuration change", port);
-
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port,
-                                           &sport->saved_attr);
-               } else
-                       notify_port_conf_change(shca, port);
-               break;
-       case 0x32: /* adapter malfunction */
-               ehca_err(&shca->ib_device, "Adapter malfunction.");
-               break;
-       case 0x33:  /* trace stopped */
-               ehca_err(&shca->ib_device, "Traced stopped.");
-               break;
-       case 0x34: /* util async event */
-               spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
-               if (spec_event == 0x80) /* client reregister required */
-                       dispatch_port_event(shca, port,
-                                           IB_EVENT_CLIENT_REREGISTER,
-                                           "client reregister req.");
-               else
-                       ehca_warn(&shca->ib_device, "Unknown util async "
-                                 "event %x on port %x", spec_event, port);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
-                        ec, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static inline void reset_eq_pending(struct ehca_cq *cq)
-{
-       u64 CQx_EP;
-       struct h_galpa gal = cq->galpas.kernel;
-
-       hipz_galpa_store_cq(gal, cqx_ep, 0x0);
-       CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->neq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-void ehca_tasklet_neq(unsigned long data)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)data;
-       struct ehca_eqe *eqe;
-       u64 ret;
-
-       eqe = ehca_poll_eq(shca, &shca->neq);
-
-       while (eqe) {
-               if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
-                       parse_ec(shca, eqe->entry);
-
-               eqe = ehca_poll_eq(shca, &shca->neq);
-       }
-
-       ret = hipz_h_reset_event(shca->ipz_hca_handle,
-                                shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
-
-       if (ret != H_SUCCESS)
-               ehca_err(&shca->ib_device, "Can't clear notification events.");
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->eq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-
-static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
-{
-       u64 eqe_value;
-       u32 token;
-       struct ehca_cq *cq;
-
-       eqe_value = eqe->entry;
-       ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
-       if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-               ehca_dbg(&shca->ib_device, "Got completion event");
-               token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, token);
-               if (cq)
-                       atomic_inc(&cq->nr_events);
-               read_unlock(&ehca_cq_idr_lock);
-               if (cq == NULL) {
-                       ehca_err(&shca->ib_device,
-                                "Invalid eqe for non-existing cq token=%x",
-                                token);
-                       return;
-               }
-               reset_eq_pending(cq);
-               if (ehca_scaling_code)
-                       queue_comp_task(cq);
-               else {
-                       comp_event_callback(cq);
-                       if (atomic_dec_and_test(&cq->nr_events))
-                               wake_up(&cq->wait_completion);
-               }
-       } else {
-               ehca_dbg(&shca->ib_device, "Got non completion event");
-               parse_identifier(shca, eqe_value);
-       }
-}
-
-void ehca_process_eq(struct ehca_shca *shca, int is_irq)
-{
-       struct ehca_eq *eq = &shca->eq;
-       struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-       u64 eqe_value, ret;
-       int eqe_cnt, i;
-       int eq_empty = 0;
-
-       spin_lock(&eq->irq_spinlock);
-       if (is_irq) {
-               const int max_query_cnt = 100;
-               int query_cnt = 0;
-               int int_state = 1;
-               do {
-                       int_state = hipz_h_query_int_state(
-                               shca->ipz_hca_handle, eq->ist);
-                       query_cnt++;
-                       iosync();
-               } while (int_state && query_cnt < max_query_cnt);
-               if (unlikely((query_cnt == max_query_cnt)))
-                       ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
-                                int_state, query_cnt);
-       }
-
-       /* read out all eqes */
-       eqe_cnt = 0;
-       do {
-               u32 token;
-               eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
-               if (!eqe_cache[eqe_cnt].eqe)
-                       break;
-               eqe_value = eqe_cache[eqe_cnt].eqe->entry;
-               if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-                       token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-                       read_lock(&ehca_cq_idr_lock);
-                       eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
-                       if (eqe_cache[eqe_cnt].cq)
-                               atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
-                       read_unlock(&ehca_cq_idr_lock);
-                       if (!eqe_cache[eqe_cnt].cq) {
-                               ehca_err(&shca->ib_device,
-                                        "Invalid eqe for non-existing cq "
-                                        "token=%x", token);
-                               continue;
-                       }
-               } else
-                       eqe_cache[eqe_cnt].cq = NULL;
-               eqe_cnt++;
-       } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
-       if (!eqe_cnt) {
-               if (is_irq)
-                       ehca_dbg(&shca->ib_device,
-                                "No eqe found for irq event");
-               goto unlock_irq_spinlock;
-       } else if (!is_irq) {
-               ret = hipz_h_eoi(eq->ist);
-               if (ret != H_SUCCESS)
-                       ehca_err(&shca->ib_device,
-                                "bad return code EOI -rc = %lld\n", ret);
-               ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
-       }
-       if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
-               ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
-       /* enable irq for new packets */
-       for (i = 0; i < eqe_cnt; i++) {
-               if (eq->eqe_cache[i].cq)
-                       reset_eq_pending(eq->eqe_cache[i].cq);
-       }
-       /* check eq */
-       spin_lock(&eq->spinlock);
-       eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
-       spin_unlock(&eq->spinlock);
-       /* call completion handler for cached eqes */
-       for (i = 0; i < eqe_cnt; i++)
-               if (eq->eqe_cache[i].cq) {
-                       if (ehca_scaling_code)
-                               queue_comp_task(eq->eqe_cache[i].cq);
-                       else {
-                               struct ehca_cq *cq = eq->eqe_cache[i].cq;
-                               comp_event_callback(cq);
-                               if (atomic_dec_and_test(&cq->nr_events))
-                                       wake_up(&cq->wait_completion);
-                       }
-               } else {
-                       ehca_dbg(&shca->ib_device, "Got non completion event");
-                       parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
-               }
-       /* poll eq if not empty */
-       if (eq_empty)
-               goto unlock_irq_spinlock;
-       do {
-               struct ehca_eqe *eqe;
-               eqe = ehca_poll_eq(shca, &shca->eq);
-               if (!eqe)
-                       break;
-               process_eqe(shca, eqe);
-       } while (1);
-
-unlock_irq_spinlock:
-       spin_unlock(&eq->irq_spinlock);
-}
-
-void ehca_tasklet_eq(unsigned long data)
-{
-       ehca_process_eq((struct ehca_shca*)data, 1);
-}
-
-static int find_next_online_cpu(struct ehca_comp_pool *pool)
-{
-       int cpu;
-       unsigned long flags;
-
-       WARN_ON_ONCE(!in_interrupt());
-       if (ehca_debug_level >= 3)
-               ehca_dmp(cpu_online_mask, cpumask_size(), "");
-
-       spin_lock_irqsave(&pool->last_cpu_lock, flags);
-       do {
-               cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
-               if (cpu >= nr_cpu_ids)
-                       cpu = cpumask_first(cpu_online_mask);
-               pool->last_cpu = cpu;
-       } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
-       spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
-
-       return cpu;
-}
-
-static void __queue_comp_task(struct ehca_cq *__cq,
-                             struct ehca_cpu_comp_task *cct,
-                             struct task_struct *thread)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       spin_lock(&__cq->task_lock);
-
-       if (__cq->nr_callbacks == 0) {
-               __cq->nr_callbacks++;
-               list_add_tail(&__cq->entry, &cct->cq_list);
-               cct->cq_jobs++;
-               wake_up_process(thread);
-       } else
-               __cq->nr_callbacks++;
-
-       spin_unlock(&__cq->task_lock);
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-}
-
-static void queue_comp_task(struct ehca_cq *__cq)
-{
-       int cpu_id;
-       struct ehca_cpu_comp_task *cct;
-       struct task_struct *thread;
-       int cq_jobs;
-       unsigned long flags;
-
-       cpu_id = find_next_online_cpu(pool);
-       BUG_ON(!cpu_online(cpu_id));
-
-       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-       BUG_ON(!cct || !thread);
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       cq_jobs = cct->cq_jobs;
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-       if (cq_jobs > 0) {
-               cpu_id = find_next_online_cpu(pool);
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-               thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-               BUG_ON(!cct || !thread);
-       }
-       __queue_comp_task(__cq, cct, thread);
-}
-
-static void run_comp_task(struct ehca_cpu_comp_task *cct)
-{
-       struct ehca_cq *cq;
-
-       while (!list_empty(&cct->cq_list)) {
-               cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-               spin_unlock_irq(&cct->task_lock);
-
-               comp_event_callback(cq);
-               if (atomic_dec_and_test(&cq->nr_events))
-                       wake_up(&cq->wait_completion);
-
-               spin_lock_irq(&cct->task_lock);
-               spin_lock(&cq->task_lock);
-               cq->nr_callbacks--;
-               if (!cq->nr_callbacks) {
-                       list_del_init(cct->cq_list.next);
-                       cct->cq_jobs--;
-               }
-               spin_unlock(&cq->task_lock);
-       }
-}
-
-static void comp_task_park(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       struct ehca_cpu_comp_task *target;
-       struct task_struct *thread;
-       struct ehca_cq *cq, *tmp;
-       LIST_HEAD(list);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       list_splice_init(&cct->cq_list, &list);
-       spin_unlock_irq(&cct->task_lock);
-
-       cpu = find_next_online_cpu(pool);
-       target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
-       spin_lock_irq(&target->task_lock);
-       list_for_each_entry_safe(cq, tmp, &list, entry) {
-               list_del(&cq->entry);
-               __queue_comp_task(cq, target, thread);
-       }
-       spin_unlock_irq(&target->task_lock);
-}
-
-static void comp_task_stop(unsigned int cpu, bool online)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       WARN_ON(!list_empty(&cct->cq_list));
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static int comp_task_should_run(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       return cct->cq_jobs;
-}
-
-static void comp_task(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
-       int cql_empty;
-
-       spin_lock_irq(&cct->task_lock);
-       cql_empty = list_empty(&cct->cq_list);
-       if (!cql_empty) {
-               __set_current_state(TASK_RUNNING);
-               run_comp_task(cct);
-       }
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static struct smp_hotplug_thread comp_pool_threads = {
-       .thread_should_run      = comp_task_should_run,
-       .thread_fn              = comp_task,
-       .thread_comm            = "ehca_comp/%u",
-       .cleanup                = comp_task_stop,
-       .park                   = comp_task_park,
-};
-
-int ehca_create_comp_pool(void)
-{
-       int cpu, ret = -ENOMEM;
-
-       if (!ehca_scaling_code)
-               return 0;
-
-       pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
-       if (pool == NULL)
-               return -ENOMEM;
-
-       spin_lock_init(&pool->last_cpu_lock);
-       pool->last_cpu = cpumask_any(cpu_online_mask);
-
-       pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
-       if (!pool->cpu_comp_tasks)
-               goto out_pool;
-
-       pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
-       if (!pool->cpu_comp_threads)
-               goto out_tasks;
-
-       for_each_present_cpu(cpu) {
-               struct ehca_cpu_comp_task *cct;
-
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-               spin_lock_init(&cct->task_lock);
-               INIT_LIST_HEAD(&cct->cq_list);
-       }
-
-       comp_pool_threads.store = pool->cpu_comp_threads;
-       ret = smpboot_register_percpu_thread(&comp_pool_threads);
-       if (ret)
-               goto out_threads;
-
-       pr_info("eHCA scaling code enabled\n");
-       return ret;
-
-out_threads:
-       free_percpu(pool->cpu_comp_threads);
-out_tasks:
-       free_percpu(pool->cpu_comp_tasks);
-out_pool:
-       kfree(pool);
-       return ret;
-}
-
-void ehca_destroy_comp_pool(void)
-{
-       if (!ehca_scaling_code)
-               return;
-
-       smpboot_unregister_percpu_thread(&comp_pool_threads);
-
-       free_percpu(pool->cpu_comp_threads);
-       free_percpu(pool->cpu_comp_tasks);
-       kfree(pool);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
deleted file mode 100644 (file)
index 5370199..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions and structs for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IRQ_H
-#define __EHCA_IRQ_H
-
-
-struct ehca_shca;
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
-void ehca_tasklet_neq(unsigned long data);
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
-void ehca_tasklet_eq(unsigned long data);
-void ehca_process_eq(struct ehca_shca *shca, int is_irq);
-
-struct ehca_cpu_comp_task {
-       struct list_head cq_list;
-       spinlock_t task_lock;
-       int cq_jobs;
-       int active;
-};
-
-struct ehca_comp_pool {
-       struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
-       struct task_struct * __percpu *cpu_comp_threads;
-       int last_cpu;
-       spinlock_t last_cpu_lock;
-};
-
-int ehca_create_comp_pool(void);
-void ehca_destroy_comp_pool(void);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
deleted file mode 100644 (file)
index 80e6a3d..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions for internal functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Dietmar Decker <ddecker@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IVERBS_H__
-#define __EHCA_IVERBS_H__
-
-#include "ehca_classes.h"
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw);
-
-int ehca_query_port(struct ib_device *ibdev, u8 port,
-                   struct ib_port_attr *props);
-
-enum rdma_protocol_type
-ehca_query_protocol(struct ib_device *device, u8 port_num);
-
-int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
-                       struct ehca_sma_attr *attr);
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
-                  union ib_gid *gid);
-
-int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
-                    struct ib_port_modify *props);
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context,
-                           struct ib_udata *udata);
-
-int ehca_dealloc_pd(struct ib_pd *pd);
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_destroy_ah(struct ib_ah *ah);
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags, u64 *iova_start);
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata);
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
-int ehca_dereg_mr(struct ib_mr *mr);
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
-
-int ehca_dealloc_mw(struct ib_mw *mw);
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr);
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list, int list_len, u64 iova);
-
-int ehca_unmap_fmr(struct list_head *fmr_list);
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr);
-
-enum ehca_eq_type {
-       EHCA_EQ = 0, /* Event Queue              */
-       EHCA_NEQ     /* Notification Event Queue */
-};
-
-int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
-                  enum ehca_eq_type type, const u32 length);
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata);
-
-int ehca_destroy_cq(struct ib_cq *cq);
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
-
-int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *init_attr,
-                            struct ib_udata *udata);
-
-int ehca_destroy_qp(struct ib_qp *qp);
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata);
-
-int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr);
-
-int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr);
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *init_attr,
-                              struct ib_udata *udata);
-
-int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-
-int ehca_destroy_srq(struct ib_srq *srq);
-
-u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
-                   struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata);
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context);
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index);
-
-void ehca_poll_eqs(unsigned long data);
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd);
-
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
-
-#ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(gfp_t flags);
-void ehca_free_fw_ctrlblock(void *ptr);
-#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
-#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
-#endif
-
-void ehca_recover_sqp(struct ib_qp *sqp);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
deleted file mode 100644 (file)
index 860b974..0000000
+++ /dev/null
@@ -1,1122 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  module start stop, hca detection
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <linux/slab.h>
-#endif
-
-#include <linux/notifier.h>
-#include <linux/memory.h>
-#include <rdma/ib_mad.h>
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-#define HCAD_VERSION "0029"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION(HCAD_VERSION);
-
-static bool ehca_open_aqp1    = 0;
-static int ehca_hw_level      = 0;
-static bool ehca_poll_all_eqs = 1;
-
-int ehca_debug_level   = 0;
-int ehca_nr_ports      = -1;
-bool ehca_use_hp_mr    = 0;
-int ehca_port_act_time = 30;
-int ehca_static_rate   = -1;
-bool ehca_scaling_code = 0;
-int ehca_lock_hcalls   = -1;
-int ehca_max_cq        = -1;
-int ehca_max_qp        = -1;
-
-module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
-module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
-module_param_named(hw_level,      ehca_hw_level,      int,  S_IRUGO);
-module_param_named(nr_ports,      ehca_nr_ports,      int,  S_IRUGO);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     bool, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
-module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
-module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
-module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
-module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
-
-MODULE_PARM_DESC(open_aqp1,
-                "Open AQP1 on startup (default: no)");
-MODULE_PARM_DESC(debug_level,
-                "Amount of debug output (0: none (default), 1: traces, "
-                "2: some dumps, 3: lots)");
-MODULE_PARM_DESC(hw_level,
-                "Hardware level (0: autosensing (default), "
-                "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
-MODULE_PARM_DESC(nr_ports,
-                "number of connected ports (-1: autodetect (default), "
-                "1: port one only, 2: two ports)");
-MODULE_PARM_DESC(use_hp_mr,
-                "Use high performance MRs (default: no)");
-MODULE_PARM_DESC(port_act_time,
-                "Time to wait for port activation (default: 30 sec)");
-MODULE_PARM_DESC(poll_all_eqs,
-                "Poll all event queues periodically (default: yes)");
-MODULE_PARM_DESC(static_rate,
-                "Set permanent static rate (default: no static rate)");
-MODULE_PARM_DESC(scaling_code,
-                "Enable scaling code (default: no)");
-MODULE_PARM_DESC(lock_hcalls,
-                "Serialize all hCalls made by the driver "
-                "(default: autodetect)");
-MODULE_PARM_DESC(number_of_cqs,
-               "Max number of CQs which can be allocated "
-               "(default: autodetect)");
-MODULE_PARM_DESC(number_of_qps,
-               "Max number of QPs which can be allocated "
-               "(default: autodetect)");
-
-DEFINE_RWLOCK(ehca_qp_idr_lock);
-DEFINE_RWLOCK(ehca_cq_idr_lock);
-DEFINE_IDR(ehca_qp_idr);
-DEFINE_IDR(ehca_cq_idr);
-
-static LIST_HEAD(shca_list); /* list of all registered ehcas */
-DEFINE_SPINLOCK(shca_list_lock);
-
-static struct timer_list poll_eqs_timer;
-
-#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache;
-
-void *ehca_alloc_fw_ctrlblock(gfp_t flags)
-{
-       void *ret = kmem_cache_zalloc(ctblk_cache, flags);
-       if (!ret)
-               ehca_gen_err("Out of memory for ctblk");
-       return ret;
-}
-
-void ehca_free_fw_ctrlblock(void *ptr)
-{
-       if (ptr)
-               kmem_cache_free(ctblk_cache, ptr);
-
-}
-#endif
-
-int ehca2ib_return_code(u64 ehca_rc)
-{
-       switch (ehca_rc) {
-       case H_SUCCESS:
-               return 0;
-       case H_RESOURCE:             /* Resource in use */
-       case H_BUSY:
-               return -EBUSY;
-       case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-       case H_CONSTRAINED:          /* resource constraint */
-       case H_NO_MEM:
-               return -ENOMEM;
-       default:
-               return -EINVAL;
-       }
-}
-
-static int ehca_create_slab_caches(void)
-{
-       int ret;
-
-       ret = ehca_init_pd_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create PD SLAB cache.");
-               return ret;
-       }
-
-       ret = ehca_init_cq_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create CQ SLAB cache.");
-               goto create_slab_caches2;
-       }
-
-       ret = ehca_init_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create QP SLAB cache.");
-               goto create_slab_caches3;
-       }
-
-       ret = ehca_init_av_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create AV SLAB cache.");
-               goto create_slab_caches4;
-       }
-
-       ret = ehca_init_mrmw_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create MR&MW SLAB cache.");
-               goto create_slab_caches5;
-       }
-
-       ret = ehca_init_small_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create small queue SLAB cache.");
-               goto create_slab_caches6;
-       }
-
-#ifdef CONFIG_PPC_64K_PAGES
-       ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
-                                       EHCA_PAGESIZE, H_CB_ALIGNMENT,
-                                       SLAB_HWCACHE_ALIGN,
-                                       NULL);
-       if (!ctblk_cache) {
-               ehca_gen_err("Cannot create ctblk SLAB cache.");
-               ehca_cleanup_small_qp_cache();
-               ret = -ENOMEM;
-               goto create_slab_caches6;
-       }
-#endif
-       return 0;
-
-create_slab_caches6:
-       ehca_cleanup_mrmw_cache();
-
-create_slab_caches5:
-       ehca_cleanup_av_cache();
-
-create_slab_caches4:
-       ehca_cleanup_qp_cache();
-
-create_slab_caches3:
-       ehca_cleanup_cq_cache();
-
-create_slab_caches2:
-       ehca_cleanup_pd_cache();
-
-       return ret;
-}
-
-static void ehca_destroy_slab_caches(void)
-{
-       ehca_cleanup_small_qp_cache();
-       ehca_cleanup_mrmw_cache();
-       ehca_cleanup_av_cache();
-       ehca_cleanup_qp_cache();
-       ehca_cleanup_cq_cache();
-       ehca_cleanup_pd_cache();
-#ifdef CONFIG_PPC_64K_PAGES
-       kmem_cache_destroy(ctblk_cache);
-#endif
-}
-
-#define EHCA_HCAAVER  EHCA_BMASK_IBM(32, 39)
-#define EHCA_REVID    EHCA_BMASK_IBM(40, 63)
-
-static struct cap_descr {
-       u64 mask;
-       char *descr;
-} hca_cap_descr[] = {
-       { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
-       { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
-       { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
-       { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
-       { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
-       { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
-       { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
-       { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
-       { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
-       { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
-       { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
-       { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
-       { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
-       { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
-       { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
-       { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
-       { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
-       { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
-};
-
-static int ehca_sense_attributes(struct ehca_shca *shca)
-{
-       int i, ret = 0;
-       u64 h_ret;
-       struct hipz_query_hca *rblock;
-       struct hipz_query_port *port;
-       const char *loc_code;
-
-       static const u32 pgsize_map[] = {
-               HCA_CAP_MR_PGSIZE_4K,  0x1000,
-               HCA_CAP_MR_PGSIZE_64K, 0x10000,
-               HCA_CAP_MR_PGSIZE_1M,  0x100000,
-               HCA_CAP_MR_PGSIZE_16M, 0x1000000,
-       };
-
-       ehca_gen_dbg("Probing adapter %s...",
-                    shca->ofdev->dev.of_node->full_name);
-       loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
-                                  NULL);
-       if (loc_code)
-               ehca_gen_dbg(" ... location lode=%s", loc_code);
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_gen_err("Cannot allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query device properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       if (ehca_nr_ports == 1)
-               shca->num_ports = 1;
-       else
-               shca->num_ports = (u8)rblock->num_ports;
-
-       ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
-
-       if (ehca_hw_level == 0) {
-               u32 hcaaver;
-               u32 revid;
-
-               hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
-               revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
-
-               ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
-
-               if (hcaaver == 1) {
-                       if (revid <= 3)
-                               shca->hw_level = 0x10 | (revid + 1);
-                       else
-                               shca->hw_level = 0x14;
-               } else if (hcaaver == 2) {
-                       if (revid == 0)
-                               shca->hw_level = 0x21;
-                       else if (revid == 0x10)
-                               shca->hw_level = 0x22;
-                       else if (revid == 0x20 || revid == 0x21)
-                               shca->hw_level = 0x23;
-               }
-
-               if (!shca->hw_level) {
-                       ehca_gen_warn("unknown hardware version"
-                                     " - assuming default level");
-                       shca->hw_level = 0x22;
-               }
-       } else
-               shca->hw_level = ehca_hw_level;
-       ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
-
-       shca->hca_cap = rblock->hca_cap_indicators;
-       ehca_gen_dbg(" ... HCA capabilities:");
-       for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
-               if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
-                       ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
-
-       /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
-        * a firmware property, so it's valid across all adapters
-        */
-       if (ehca_lock_hcalls == -1)
-               ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
-                                       shca->hca_cap);
-
-       /* translate supported MR page sizes; always support 4K */
-       shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
-       for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
-               if (rblock->memory_page_size_supported & pgsize_map[i])
-                       shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
-
-       /* Set maximum number of CQs and QPs to calculate EQ size */
-       if (shca->max_num_qps == -1)
-               shca->max_num_qps = min_t(int, rblock->max_qp,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
-               ehca_gen_warn("The requested number of QPs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_qp, rblock->max_qp);
-               shca->max_num_qps = rblock->max_qp;
-       }
-
-       if (shca->max_num_cqs == -1)
-               shca->max_num_cqs = min_t(int, rblock->max_cq,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
-               ehca_gen_warn("The requested number of CQs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_cq, rblock->max_cq);
-       }
-
-       /* query max MTU from first port -- it's the same for all ports */
-       port = (struct hipz_query_port *)rblock;
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query port properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       shca->max_mtu = port->max_mtu;
-
-sense_attributes1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int init_node_guid(struct ehca_shca *shca)
-{
-       int ret = 0;
-       struct hipz_query_hca *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto init_node_guid1;
-       }
-
-       memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
-
-init_node_guid1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num,
-                              struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ehca_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-static int ehca_init_device(struct ehca_shca *shca)
-{
-       int ret;
-
-       ret = init_node_guid(shca);
-       if (ret)
-               return ret;
-
-       strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
-       shca->ib_device.owner               = THIS_MODULE;
-
-       shca->ib_device.uverbs_abi_ver      = 8;
-       shca->ib_device.uverbs_cmd_mask     =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-
-       shca->ib_device.node_type           = RDMA_NODE_IB_CA;
-       shca->ib_device.phys_port_cnt       = shca->num_ports;
-       shca->ib_device.num_comp_vectors    = 1;
-       shca->ib_device.dma_device          = &shca->ofdev->dev;
-       shca->ib_device.query_device        = ehca_query_device;
-       shca->ib_device.query_port          = ehca_query_port;
-       shca->ib_device.query_gid           = ehca_query_gid;
-       shca->ib_device.query_pkey          = ehca_query_pkey;
-       /* shca->in_device.modify_device    = ehca_modify_device    */
-       shca->ib_device.modify_port         = ehca_modify_port;
-       shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
-       shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
-       shca->ib_device.alloc_pd            = ehca_alloc_pd;
-       shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
-       shca->ib_device.create_ah           = ehca_create_ah;
-       /* shca->ib_device.modify_ah        = ehca_modify_ah;       */
-       shca->ib_device.query_ah            = ehca_query_ah;
-       shca->ib_device.destroy_ah          = ehca_destroy_ah;
-       shca->ib_device.create_qp           = ehca_create_qp;
-       shca->ib_device.modify_qp           = ehca_modify_qp;
-       shca->ib_device.query_qp            = ehca_query_qp;
-       shca->ib_device.destroy_qp          = ehca_destroy_qp;
-       shca->ib_device.post_send           = ehca_post_send;
-       shca->ib_device.post_recv           = ehca_post_recv;
-       shca->ib_device.create_cq           = ehca_create_cq;
-       shca->ib_device.destroy_cq          = ehca_destroy_cq;
-       shca->ib_device.resize_cq           = ehca_resize_cq;
-       shca->ib_device.poll_cq             = ehca_poll_cq;
-       /* shca->ib_device.peek_cq          = ehca_peek_cq;         */
-       shca->ib_device.req_notify_cq       = ehca_req_notify_cq;
-       /* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
-       shca->ib_device.get_dma_mr          = ehca_get_dma_mr;
-       shca->ib_device.reg_phys_mr         = ehca_reg_phys_mr;
-       shca->ib_device.reg_user_mr         = ehca_reg_user_mr;
-       shca->ib_device.query_mr            = ehca_query_mr;
-       shca->ib_device.dereg_mr            = ehca_dereg_mr;
-       shca->ib_device.rereg_phys_mr       = ehca_rereg_phys_mr;
-       shca->ib_device.alloc_mw            = ehca_alloc_mw;
-       shca->ib_device.bind_mw             = ehca_bind_mw;
-       shca->ib_device.dealloc_mw          = ehca_dealloc_mw;
-       shca->ib_device.alloc_fmr           = ehca_alloc_fmr;
-       shca->ib_device.map_phys_fmr        = ehca_map_phys_fmr;
-       shca->ib_device.unmap_fmr           = ehca_unmap_fmr;
-       shca->ib_device.dealloc_fmr         = ehca_dealloc_fmr;
-       shca->ib_device.attach_mcast        = ehca_attach_mcast;
-       shca->ib_device.detach_mcast        = ehca_detach_mcast;
-       shca->ib_device.process_mad         = ehca_process_mad;
-       shca->ib_device.mmap                = ehca_mmap;
-       shca->ib_device.dma_ops             = &ehca_dma_mapping_ops;
-       shca->ib_device.get_port_immutable  = ehca_port_immutable;
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               shca->ib_device.uverbs_cmd_mask |=
-                       (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
-               shca->ib_device.create_srq          = ehca_create_srq;
-               shca->ib_device.modify_srq          = ehca_modify_srq;
-               shca->ib_device.query_srq           = ehca_query_srq;
-               shca->ib_device.destroy_srq         = ehca_destroy_srq;
-               shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
-       }
-
-       return ret;
-}
-
-static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
-{
-       struct ehca_sport *sport = &shca->sport[port - 1];
-       struct ib_cq *ibcq;
-       struct ib_qp *ibqp;
-       struct ib_qp_init_attr qp_init_attr;
-       struct ib_cq_init_attr cq_attr = {};
-       int ret;
-
-       if (sport->ibcq_aqp1) {
-               ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
-               return -EPERM;
-       }
-
-       cq_attr.cqe = 10;
-       ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1),
-                           &cq_attr);
-       if (IS_ERR(ibcq)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
-               return PTR_ERR(ibcq);
-       }
-       sport->ibcq_aqp1 = ibcq;
-
-       if (sport->ibqp_sqp[IB_QPT_GSI]) {
-               ehca_err(&shca->ib_device, "AQP1 QP is already created.");
-               ret = -EPERM;
-               goto create_aqp1;
-       }
-
-       memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
-       qp_init_attr.send_cq          = ibcq;
-       qp_init_attr.recv_cq          = ibcq;
-       qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
-       qp_init_attr.cap.max_send_wr  = 100;
-       qp_init_attr.cap.max_recv_wr  = 100;
-       qp_init_attr.cap.max_send_sge = 2;
-       qp_init_attr.cap.max_recv_sge = 1;
-       qp_init_attr.qp_type          = IB_QPT_GSI;
-       qp_init_attr.port_num         = port;
-       qp_init_attr.qp_context       = NULL;
-       qp_init_attr.event_handler    = NULL;
-       qp_init_attr.srq              = NULL;
-
-       ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
-       if (IS_ERR(ibqp)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
-               ret = PTR_ERR(ibqp);
-               goto create_aqp1;
-       }
-       sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
-
-       return 0;
-
-create_aqp1:
-       ib_destroy_cq(sport->ibcq_aqp1);
-       return ret;
-}
-
-static int ehca_destroy_aqp1(struct ehca_sport *sport)
-{
-       int ret;
-
-       ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
-       if (ret) {
-               ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
-               return ret;
-       }
-
-       ret = ib_destroy_cq(sport->ibcq_aqp1);
-       if (ret)
-               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
-
-       return ret;
-}
-
-static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
-}
-
-static ssize_t ehca_store_debug_level(struct device_driver *ddp,
-                                     const char *buf, size_t count)
-{
-       int value = (*buf) - '0';
-       if (value >= 0 && value <= 9)
-               ehca_debug_level = value;
-       return 1;
-}
-
-static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-                  ehca_show_debug_level, ehca_store_debug_level);
-
-static struct attribute *ehca_drv_attrs[] = {
-       &driver_attr_debug_level.attr,
-       NULL
-};
-
-static struct attribute_group ehca_drv_attr_grp = {
-       .attrs = ehca_drv_attrs
-};
-
-static const struct attribute_group *ehca_drv_attr_groups[] = {
-       &ehca_drv_attr_grp,
-       NULL,
-};
-
-#define EHCA_RESOURCE_ATTR(name)                                           \
-static ssize_t  ehca_show_##name(struct device *dev,                       \
-                                struct device_attribute *attr,            \
-                                char *buf)                                \
-{                                                                         \
-       struct ehca_shca *shca;                                            \
-       struct hipz_query_hca *rblock;                                     \
-       int data;                                                          \
-                                                                          \
-       shca = dev_get_drvdata(dev);                                       \
-                                                                          \
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);                      \
-       if (!rblock) {                                                     \
-               dev_err(dev, "Can't allocate rblock memory.\n");           \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
-               dev_err(dev, "Can't query device properties\n");           \
-               ehca_free_fw_ctrlblock(rblock);                            \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       data = rblock->name;                                               \
-       ehca_free_fw_ctrlblock(rblock);                                    \
-                                                                          \
-       if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))     \
-               return snprintf(buf, 256, "1\n");                          \
-       else                                                               \
-               return snprintf(buf, 256, "%d\n", data);                   \
-                                                                          \
-}                                                                         \
-static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
-
-EHCA_RESOURCE_ATTR(num_ports);
-EHCA_RESOURCE_ATTR(hw_ver);
-EHCA_RESOURCE_ATTR(max_eq);
-EHCA_RESOURCE_ATTR(cur_eq);
-EHCA_RESOURCE_ATTR(max_cq);
-EHCA_RESOURCE_ATTR(cur_cq);
-EHCA_RESOURCE_ATTR(max_qp);
-EHCA_RESOURCE_ATTR(cur_qp);
-EHCA_RESOURCE_ATTR(max_mr);
-EHCA_RESOURCE_ATTR(cur_mr);
-EHCA_RESOURCE_ATTR(max_mw);
-EHCA_RESOURCE_ATTR(cur_mw);
-EHCA_RESOURCE_ATTR(max_pd);
-EHCA_RESOURCE_ATTR(max_ah);
-
-static ssize_t ehca_show_adapter_handle(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       struct ehca_shca *shca = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
-
-}
-static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-
-static struct attribute *ehca_dev_attrs[] = {
-       &dev_attr_adapter_handle.attr,
-       &dev_attr_num_ports.attr,
-       &dev_attr_hw_ver.attr,
-       &dev_attr_max_eq.attr,
-       &dev_attr_cur_eq.attr,
-       &dev_attr_max_cq.attr,
-       &dev_attr_cur_cq.attr,
-       &dev_attr_max_qp.attr,
-       &dev_attr_cur_qp.attr,
-       &dev_attr_max_mr.attr,
-       &dev_attr_cur_mr.attr,
-       &dev_attr_max_mw.attr,
-       &dev_attr_cur_mw.attr,
-       &dev_attr_max_pd.attr,
-       &dev_attr_max_ah.attr,
-       NULL
-};
-
-static struct attribute_group ehca_dev_attr_grp = {
-       .attrs = ehca_dev_attrs
-};
-
-static int ehca_probe(struct platform_device *dev)
-{
-       struct ehca_shca *shca;
-       const u64 *handle;
-       struct ib_pd *ibpd;
-       int ret, i, eq_size;
-       unsigned long flags;
-
-       handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
-       if (!handle) {
-               ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       if (!(*handle)) {
-               ehca_gen_err("Wrong eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
-       if (!shca) {
-               ehca_gen_err("Cannot allocate shca memory.");
-               return -ENOMEM;
-       }
-
-       mutex_init(&shca->modify_mutex);
-       atomic_set(&shca->num_cqs, 0);
-       atomic_set(&shca->num_qps, 0);
-       shca->max_num_qps = ehca_max_qp;
-       shca->max_num_cqs = ehca_max_cq;
-
-       for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
-               spin_lock_init(&shca->sport[i].mod_sqp_lock);
-
-       shca->ofdev = dev;
-       shca->ipz_hca_handle.handle = *handle;
-       dev_set_drvdata(&dev->dev, shca);
-
-       ret = ehca_sense_attributes(shca);
-       if (ret < 0) {
-               ehca_gen_err("Cannot sense eHCA attributes.");
-               goto probe1;
-       }
-
-       ret = ehca_init_device(shca);
-       if (ret) {
-               ehca_gen_err("Cannot init ehca  device struct");
-               goto probe1;
-       }
-
-       eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
-       /* create event queues */
-       ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create EQ.");
-               goto probe1;
-       }
-
-       ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create NEQ.");
-               goto probe3;
-       }
-
-       /* create internal protection domain */
-       ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
-       if (IS_ERR(ibpd)) {
-               ehca_err(&shca->ib_device, "Cannot create internal PD.");
-               ret = PTR_ERR(ibpd);
-               goto probe4;
-       }
-
-       shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
-       shca->pd->ib_pd.device = &shca->ib_device;
-
-       /* create internal max MR */
-       ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
-
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
-                        ret);
-               goto probe5;
-       }
-
-       ret = ib_register_device(&shca->ib_device, NULL);
-       if (ret) {
-               ehca_err(&shca->ib_device,
-                        "ib_register_device() failed ret=%i", ret);
-               goto probe6;
-       }
-
-       /* create AQP1 for port 1 */
-       if (ehca_open_aqp1 == 1) {
-               shca->sport[0].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 1);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 1.");
-                       goto probe7;
-               }
-       }
-
-       /* create AQP1 for port 2 */
-       if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
-               shca->sport[1].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 2);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 2.");
-                       goto probe8;
-               }
-       }
-
-       ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-       if (ret) /* only complain; we can live without attributes */
-               ehca_err(&shca->ib_device,
-                        "Cannot create device attributes  ret=%d", ret);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_add(&shca->shca_list, &shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return 0;
-
-probe8:
-       ret = ehca_destroy_aqp1(&shca->sport[0]);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy AQP1 for port 1. ret=%i", ret);
-
-probe7:
-       ib_unregister_device(&shca->ib_device);
-
-probe6:
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%x", ret);
-
-probe5:
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%x", ret);
-
-probe4:
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy NEQ. ret=%x", ret);
-
-probe3:
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy EQ. ret=%x", ret);
-
-probe1:
-       ib_dealloc_device(&shca->ib_device);
-
-       return -EINVAL;
-}
-
-static int ehca_remove(struct platform_device *dev)
-{
-       struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
-       unsigned long flags;
-       int ret;
-
-       sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-
-       if (ehca_open_aqp1 == 1) {
-               int i;
-               for (i = 0; i < shca->num_ports; i++) {
-                       ret = ehca_destroy_aqp1(&shca->sport[i]);
-                       if (ret)
-                               ehca_err(&shca->ib_device,
-                                        "Cannot destroy AQP1 for port %x "
-                                        "ret=%i", ret, i);
-               }
-       }
-
-       ib_unregister_device(&shca->ib_device);
-
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%i", ret);
-
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
-
-       ib_dealloc_device(&shca->ib_device);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_del(&shca->shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return ret;
-}
-
-static struct of_device_id ehca_device_table[] =
-{
-       {
-               .name       = "lhca",
-               .compatible = "IBM,lhca",
-       },
-       {},
-};
-MODULE_DEVICE_TABLE(of, ehca_device_table);
-
-static struct platform_driver ehca_driver = {
-       .probe       = ehca_probe,
-       .remove      = ehca_remove,
-       .driver = {
-               .name = "ehca",
-               .owner = THIS_MODULE,
-               .groups = ehca_drv_attr_groups,
-               .of_match_table = ehca_device_table,
-       },
-};
-
-void ehca_poll_eqs(unsigned long data)
-{
-       struct ehca_shca *shca;
-
-       spin_lock(&shca_list_lock);
-       list_for_each_entry(shca, &shca_list, shca_list) {
-               if (shca->eq.is_initialized) {
-                       /* call deadman proc only if eq ptr does not change */
-                       struct ehca_eq *eq = &shca->eq;
-                       int max = 3;
-                       volatile u64 q_ofs, q_ofs2;
-                       unsigned long flags;
-                       spin_lock_irqsave(&eq->spinlock, flags);
-                       q_ofs = eq->ipz_queue.current_q_offset;
-                       spin_unlock_irqrestore(&eq->spinlock, flags);
-                       do {
-                               spin_lock_irqsave(&eq->spinlock, flags);
-                               q_ofs2 = eq->ipz_queue.current_q_offset;
-                               spin_unlock_irqrestore(&eq->spinlock, flags);
-                               max--;
-                       } while (q_ofs == q_ofs2 && max > 0);
-                       if (q_ofs == q_ofs2)
-                               ehca_process_eq(shca, 0);
-               }
-       }
-       mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
-       spin_unlock(&shca_list_lock);
-}
-
-static int ehca_mem_notifier(struct notifier_block *nb,
-                            unsigned long action, void *data)
-{
-       static unsigned long ehca_dmem_warn_time;
-       unsigned long flags;
-
-       switch (action) {
-       case MEM_CANCEL_OFFLINE:
-       case MEM_CANCEL_ONLINE:
-       case MEM_ONLINE:
-       case MEM_OFFLINE:
-               return NOTIFY_OK;
-       case MEM_GOING_ONLINE:
-       case MEM_GOING_OFFLINE:
-               /* only ok if no hca is attached to the lpar */
-               spin_lock_irqsave(&shca_list_lock, flags);
-               if (list_empty(&shca_list)) {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       return NOTIFY_OK;
-               } else {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       if (printk_timed_ratelimit(&ehca_dmem_warn_time,
-                                                  30 * 1000))
-                               ehca_gen_err("DMEM operations are not allowed"
-                                            "in conjunction with eHCA");
-                       return NOTIFY_BAD;
-               }
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block ehca_mem_nb = {
-       .notifier_call = ehca_mem_notifier,
-};
-
-static int __init ehca_module_init(void)
-{
-       int ret;
-
-       printk(KERN_INFO "eHCA Infiniband Device Driver "
-              "(Version " HCAD_VERSION ")\n");
-
-       ret = ehca_create_comp_pool();
-       if (ret) {
-               ehca_gen_err("Cannot create comp pool.");
-               return ret;
-       }
-
-       ret = ehca_create_slab_caches();
-       if (ret) {
-               ehca_gen_err("Cannot create SLAB caches");
-               ret = -ENOMEM;
-               goto module_init1;
-       }
-
-       ret = ehca_create_busmap();
-       if (ret) {
-               ehca_gen_err("Cannot create busmap.");
-               goto module_init2;
-       }
-
-       ret = ibmebus_register_driver(&ehca_driver);
-       if (ret) {
-               ehca_gen_err("Cannot register eHCA device driver");
-               ret = -EINVAL;
-               goto module_init3;
-       }
-
-       ret = register_memory_notifier(&ehca_mem_nb);
-       if (ret) {
-               ehca_gen_err("Failed registering memory add/remove notifier");
-               goto module_init4;
-       }
-
-       if (ehca_poll_all_eqs != 1) {
-               ehca_gen_err("WARNING!!!");
-               ehca_gen_err("It is possible to lose interrupts.");
-       } else {
-               init_timer(&poll_eqs_timer);
-               poll_eqs_timer.function = ehca_poll_eqs;
-               poll_eqs_timer.expires = jiffies + HZ;
-               add_timer(&poll_eqs_timer);
-       }
-
-       return 0;
-
-module_init4:
-       ibmebus_unregister_driver(&ehca_driver);
-
-module_init3:
-       ehca_destroy_busmap();
-
-module_init2:
-       ehca_destroy_slab_caches();
-
-module_init1:
-       ehca_destroy_comp_pool();
-       return ret;
-};
-
-static void __exit ehca_module_exit(void)
-{
-       if (ehca_poll_all_eqs == 1)
-               del_timer_sync(&poll_eqs_timer);
-
-       ibmebus_unregister_driver(&ehca_driver);
-
-       unregister_memory_notifier(&ehca_mem_nb);
-
-       ehca_destroy_busmap();
-
-       ehca_destroy_slab_caches();
-
-       ehca_destroy_comp_pool();
-
-       idr_destroy(&ehca_cq_idr);
-       idr_destroy(&ehca_qp_idr);
-};
-
-module_init(ehca_module_init);
-module_exit(ehca_module_exit);
diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
deleted file mode 100644 (file)
index cec1815..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  mcast  functions
- *
- *  Authors: Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define MAX_MC_LID 0xFFFE
-#define MIN_MC_LID 0xC000      /* Multicast limits */
-#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
-#define EHCA_VALID_MULTICAST_LID(lid) \
-       (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
-
-int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
-
-int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->pd->device,
-                                             struct ehca_shca, ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
deleted file mode 100644 (file)
index 553e883..0000000
+++ /dev/null
@@ -1,2591 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <rdma/ib_umem.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "hcp_if.h"
-#include "hipz_hw.h"
-
-#define NUM_CHUNKS(length, chunk_size) \
-       (((length) + (chunk_size - 1)) / (chunk_size))
-
-/* max number of rpages (per hcall register_rpages) */
-#define MAX_RPAGES 512
-
-/* DMEM toleration management */
-#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
-#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
-#define EHCA_HUGEPAGESHIFT     34
-#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
-#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
-#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
-#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
-#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
-#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
-#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
-#define EHCA_DIR_MAP_SIZE (0x10000)
-#define EHCA_ENT_MAP_SIZE (0x10000)
-#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
-
-static unsigned long ehca_mr_len;
-
-/*
- * Memory map data structures
- */
-struct ehca_dir_bmap {
-       u64 ent[EHCA_MAP_ENTRIES];
-};
-struct ehca_top_bmap {
-       struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
-};
-struct ehca_bmap {
-       struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
-};
-
-static struct ehca_bmap *ehca_bmap;
-
-static struct kmem_cache *mr_cache;
-static struct kmem_cache *mw_cache;
-
-enum ehca_mr_pgsize {
-       EHCA_MR_PGSIZE4K  = 0x1000L,
-       EHCA_MR_PGSIZE64K = 0x10000L,
-       EHCA_MR_PGSIZE1M  = 0x100000L,
-       EHCA_MR_PGSIZE16M = 0x1000000L
-};
-
-#define EHCA_MR_PGSHIFT4K  12
-#define EHCA_MR_PGSHIFT64K 16
-#define EHCA_MR_PGSHIFT1M  20
-#define EHCA_MR_PGSHIFT16M 24
-
-static u64 ehca_map_vaddr(void *caddr);
-
-static u32 ehca_encode_hwpage_size(u32 pgsize)
-{
-       int log = ilog2(pgsize);
-       WARN_ON(log < 12 || log > 24 || log & 3);
-       return (log - 12) / 4;
-}
-
-static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
-{
-       return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
-}
-
-static struct ehca_mr *ehca_mr_new(void)
-{
-       struct ehca_mr *me;
-
-       me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mrlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mr_delete(struct ehca_mr *me)
-{
-       kmem_cache_free(mr_cache, me);
-}
-
-static struct ehca_mw *ehca_mw_new(void)
-{
-       struct ehca_mw *me;
-
-       me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mwlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mw_delete(struct ehca_mw *me)
-{
-       kmem_cache_free(mw_cache, me);
-}
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-
-       if (shca->maxmr) {
-               e_maxmr = ehca_mr_new();
-               if (!e_maxmr) {
-                       ehca_err(&shca->ib_device, "out of memory");
-                       ib_mr = ERR_PTR(-ENOMEM);
-                       goto get_dma_mr_exit0;
-               }
-
-               ret = ehca_reg_maxmr(shca, e_maxmr,
-                                    (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
-                                    mr_access_flags, e_pd,
-                                    &e_maxmr->ib.ib_mr.lkey,
-                                    &e_maxmr->ib.ib_mr.rkey);
-               if (ret) {
-                       ehca_mr_delete(e_maxmr);
-                       ib_mr = ERR_PTR(ret);
-                       goto get_dma_mr_exit0;
-               }
-               ib_mr = &e_maxmr->ib.ib_mr;
-       } else {
-               ehca_err(&shca->ib_device, "no internal max-MR exist!");
-               ib_mr = ERR_PTR(-EINVAL);
-               goto get_dma_mr_exit0;
-       }
-
-get_dma_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
-                        PTR_ERR(ib_mr), pd, mr_access_flags);
-       return ib_mr;
-} /* end ehca_get_dma_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags,
-                              u64 *iova_start)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       u64 size;
-
-       if ((num_phys_buf <= 0) || !phys_buf_array) {
-               ehca_err(pd->device, "bad input values: num_phys_buf=%x "
-                        "phys_buf_array=%p", num_phys_buf, phys_buf_array);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* check physical buffer list and calculate size */
-       ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
-                                           iova_start, &size);
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_phys_mr_exit0;
-       }
-       if ((size == 0) ||
-           (((u64)iova_start + size) < (u64)iova_start)) {
-               ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
-                        size, iova_start);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* register MR on HCA */
-       if (ehca_mr_is_maxmr(size, iova_start)) {
-               e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-               ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
-                                    e_pd, &e_mr->ib.ib_mr.lkey,
-                                    &e_mr->ib.ib_mr.rkey);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       } else {
-               struct ehca_mr_pginfo pginfo;
-               u32 num_kpages;
-               u32 num_hwpages;
-               u64 hw_pgsize;
-
-               num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
-                                       PAGE_SIZE);
-               /* for kernel space we try most possible pgsize */
-               hw_pgsize = ehca_get_max_hwpage_size(shca);
-               num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
-                                        hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
-                                 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                                 &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
-                        "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
-                        PTR_ERR(ib_mr), pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata)
-{
-       struct ib_mr *ib_mr;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       int ret, page_shift;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hwpage_size;
-
-       if (!pd) {
-               ehca_gen_err("bad pd=%p", pd);
-               return ERR_PTR(-EFAULT);
-       }
-
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       if (length == 0 || virt + length < virt) {
-               ehca_err(pd->device, "bad input values: length=%llx "
-                        "virt_base=%llx", length, virt);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
-                                mr_access_flags, 0);
-       if (IS_ERR(e_mr->umem)) {
-               ib_mr = (void *)e_mr->umem;
-               goto reg_user_mr_exit1;
-       }
-
-       if (e_mr->umem->page_size != PAGE_SIZE) {
-               ehca_err(pd->device, "page size not supported, "
-                        "e_mr->umem->page_size=%x", e_mr->umem->page_size);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit2;
-       }
-
-       /* determine number of MR pages */
-       num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
-       /* select proper hw_pgsize */
-       page_shift = PAGE_SHIFT;
-       if (e_mr->umem->hugetlb) {
-               /* determine page_shift, clamp between 4K and 16M */
-               page_shift = (fls64(length - 1) + 3) & ~3;
-               page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
-                                EHCA_MR_PGSHIFT16M);
-       }
-       hwpage_size = 1UL << page_shift;
-
-       /* now that we have the desired page size, shift until it's
-        * supported, too. 4K is always supported, so this terminates.
-        */
-       while (!(hwpage_size & shca->hca_cap_mr_pgsize))
-               hwpage_size >>= 4;
-
-reg_user_mr_fallback:
-       num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_USER;
-       pginfo.hwpage_size = hwpage_size;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.u.usr.region = e_mr->umem;
-       pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
-       pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
-       ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
-                         e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-       if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
-               ehca_warn(pd->device, "failed to register mr "
-                         "with hwpage_size=%llx", hwpage_size);
-               ehca_info(pd->device, "try to register mr with "
-                         "kpage_size=%lx", PAGE_SIZE);
-               /*
-                * this means kpages are not contiguous for a hw page
-                * try kernel page size as fallback solution
-                */
-               hwpage_size = PAGE_SIZE;
-               goto reg_user_mr_fallback;
-       }
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_user_mr_exit2;
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_user_mr_exit2:
-       ib_umem_release(e_mr->umem);
-reg_user_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_user_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
-                        PTR_ERR(ib_mr), pd, mr_access_flags, udata);
-       return ib_mr;
-} /* end ehca_reg_user_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf,
-                      int mr_access_flags,
-                      u64 *iova_start)
-{
-       int ret;
-
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       u64 new_size;
-       u64 *new_start;
-       u32 new_acl;
-       struct ehca_pd *new_pd;
-       u32 tmp_lkey, tmp_rkey;
-       unsigned long sl_flags;
-       u32 num_kpages = 0;
-       u32 num_hwpages = 0;
-       struct ehca_mr_pginfo pginfo;
-
-       if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
-               /* TODO not supported, because PHYP rereg hCall needs pages */
-               ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
-                        "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       if (mr_rereg_mask & IB_MR_REREG_PD) {
-               if (!pd) {
-                       ehca_err(mr->device, "rereg with bad pd, pd=%p "
-                                "mr_rereg_mask=%x", pd, mr_rereg_mask);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-
-       if ((mr_rereg_mask &
-            ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
-           (mr_rereg_mask == 0)) {
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* check other parameters */
-       if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
-               if (e_mr->flags & EHCA_MR_FLAG_FMR) {
-                       ehca_err(mr->device, "not supported for FMR, mr=%p "
-                                "flags=%x", mr, e_mr->flags);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-               if (!phys_buf_array || num_phys_buf <= 0) {
-                       ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
-                                " phys_buf_array=%p num_phys_buf=%x",
-                                mr_rereg_mask, phys_buf_array, num_phys_buf);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-       if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
-           (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
-                        "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* set requested values dependent on rereg request */
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-       new_start = e_mr->start;
-       new_size = e_mr->size;
-       new_acl = e_mr->acl;
-       new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
-               new_start = iova_start; /* change address */
-               /* check physical buffer list and calculate size */
-               ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
-                                                   num_phys_buf, iova_start,
-                                                   &new_size);
-               if (ret)
-                       goto rereg_phys_mr_exit1;
-               if ((new_size == 0) ||
-                   (((u64)iova_start + new_size) < (u64)iova_start)) {
-                       ehca_err(mr->device, "bad input values: new_size=%llx "
-                                "iova_start=%p", new_size, iova_start);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit1;
-               }
-               num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
-                                       new_size, PAGE_SIZE);
-               num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
-                                        new_size, hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               new_acl = mr_access_flags;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
-                           new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto rereg_phys_mr_exit1;
-
-       /* successful reregistration */
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mr->pd = pd;
-       mr->lkey = tmp_lkey;
-       mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
-                        "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
-                        "iova_start=%p",
-                        ret, mr, mr_rereg_mask, pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       unsigned long sl_flags;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto query_mr_exit0;
-       }
-
-       memset(mr_attr, 0, sizeof(struct ib_mr_attr));
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
-       h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
-                        "hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto query_mr_exit1;
-       }
-       mr_attr->pd = mr->pd;
-       mr_attr->device_virt_addr = hipzout.vaddr;
-       mr_attr->size = hipzout.len;
-       mr_attr->lkey = hipzout.lkey;
-       mr_attr->rkey = hipzout.rkey;
-       ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
-                        ret, mr, mr_attr);
-       return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_mr(struct ib_mr *mr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       } else if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       }
-
-       /* TODO: BUSY: MR still has bound window(s) */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
-                        "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
-                        h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto dereg_mr_exit0;
-       }
-
-       if (e_mr->umem)
-               ib_umem_release(e_mr->umem);
-
-       /* successful deregistration */
-       ehca_mr_delete(e_mr);
-
-dereg_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
-       return ret;
-} /* end ehca_dereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *ib_mw;
-       u64 h_ret;
-       struct ehca_mw *e_mw;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_mw_hipzout_parms hipzout;
-
-       if (type != IB_MW_TYPE_1)
-               return ERR_PTR(-EINVAL);
-
-       e_mw = ehca_mw_new();
-       if (!e_mw) {
-               ib_mw = ERR_PTR(-ENOMEM);
-               goto alloc_mw_exit0;
-       }
-
-       h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
-                        "shca=%p hca_hndl=%llx mw=%p",
-                        h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
-               ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto alloc_mw_exit1;
-       }
-       /* successful MW allocation */
-       e_mw->ipz_mw_handle = hipzout.handle;
-       e_mw->ib_mw.rkey    = hipzout.rkey;
-       return &e_mw->ib_mw;
-
-alloc_mw_exit1:
-       ehca_mw_delete(e_mw);
-alloc_mw_exit0:
-       if (IS_ERR(ib_mw))
-               ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
-       return ib_mw;
-} /* end ehca_alloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_bind_mw(struct ib_qp *qp,
-                struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind)
-{
-       /* TODO: not supported up to now */
-       ehca_gen_err("bind MW currently not supported by HCAD");
-
-       return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_mw(struct ib_mw *mw)
-{
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mw->device, struct ehca_shca, ib_device);
-       struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
-
-       h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
-                        "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
-                        h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
-                        e_mw->ipz_mw_handle.handle);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful deallocation */
-       ehca_mw_delete(e_mw);
-       return 0;
-} /* end ehca_dealloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr)
-{
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr *e_fmr;
-       int ret;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       u64 hw_pgsize;
-
-       /* check other parameters */
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if (mr_access_flags & IB_ACCESS_MW_BIND) {
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
-               ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
-                        "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
-                        fmr_attr->max_pages, fmr_attr->max_maps,
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       hw_pgsize = 1 << fmr_attr->page_shift;
-       if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
-               ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       e_fmr = ehca_mr_new();
-       if (!e_fmr) {
-               ib_fmr = ERR_PTR(-ENOMEM);
-               goto alloc_fmr_exit0;
-       }
-       e_fmr->flags |= EHCA_MR_FLAG_FMR;
-
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.hwpage_size = hw_pgsize;
-       /*
-        * pginfo.num_hwpages==0, ie register_rpages() will not be called
-        * but deferred to map_phys_fmr()
-        */
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         fmr_attr->max_pages * (1 << fmr_attr->page_shift),
-                         mr_access_flags, e_pd, &pginfo,
-                         &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               ib_fmr = ERR_PTR(ret);
-               goto alloc_fmr_exit1;
-       }
-
-       /* successful */
-       e_fmr->hwpage_size = hw_pgsize;
-       e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
-       e_fmr->fmr_max_pages = fmr_attr->max_pages;
-       e_fmr->fmr_max_maps = fmr_attr->max_maps;
-       e_fmr->fmr_map_cnt = 0;
-       return &e_fmr->ib.ib_fmr;
-
-alloc_fmr_exit1:
-       ehca_mr_delete(e_fmr);
-alloc_fmr_exit0:
-       return ib_fmr;
-} /* end ehca_alloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list,
-                     int list_len,
-                     u64 iova)
-{
-       int ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-       struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       u32 tmp_lkey, tmp_rkey;
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
-       if (ret)
-               goto map_phys_fmr_exit0;
-       if (iova % e_fmr->fmr_page_size) {
-               /* only whole-numbered pages */
-               ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
-                        iova, e_fmr->fmr_page_size);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
-               /* HCAD does not limit the maps, however trace this anyway */
-               ehca_info(fmr->device, "map limit exceeded, fmr=%p "
-                         "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
-                         fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
-       }
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       pginfo.num_kpages = list_len;
-       pginfo.hwpage_size = e_fmr->hwpage_size;
-       pginfo.num_hwpages =
-               list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
-       pginfo.u.fmr.page_list = page_list;
-       pginfo.next_hwpage =
-               (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
-       pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
-
-       ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
-                           list_len * e_fmr->fmr_page_size,
-                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto map_phys_fmr_exit0;
-
-       /* successful reregistration */
-       e_fmr->fmr_map_cnt++;
-       e_fmr->ib.ib_fmr.lkey = tmp_lkey;
-       e_fmr->ib.ib_fmr.rkey = tmp_rkey;
-       return 0;
-
-map_phys_fmr_exit0:
-       if (ret)
-               ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
-                        "iova=%llx", ret, fmr, page_list, list_len, iova);
-       return ret;
-} /* end ehca_map_phys_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_fmr(struct list_head *fmr_list)
-{
-       int ret = 0;
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca = NULL;
-       struct ehca_shca *prev_shca;
-       struct ehca_mr *e_fmr;
-       u32 num_fmr = 0;
-       u32 unmap_fmr_cnt = 0;
-
-       /* check all FMR belong to same SHCA, and check internal flag */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               prev_shca = shca;
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               if ((shca != prev_shca) && prev_shca) {
-                       ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
-                                "prev_shca=%p e_fmr=%p",
-                                shca, prev_shca, e_fmr);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-                       ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
-                                "e_fmr->flags=%x", e_fmr, e_fmr->flags);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               num_fmr++;
-       }
-
-       /* loop over all FMRs to unmap */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               unmap_fmr_cnt++;
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               ret = ehca_unmap_one_fmr(shca, e_fmr);
-               if (ret) {
-                       /* unmap failed, stop unmapping of rest of FMRs */
-                       ehca_err(&shca->ib_device, "unmap of one FMR failed, "
-                                "stop rest, e_fmr=%p num_fmr=%x "
-                                "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
-                                unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
-                       goto unmap_fmr_exit0;
-               }
-       }
-
-unmap_fmr_exit0:
-       if (ret)
-               ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
-                            ret, fmr_list, num_fmr, unmap_fmr_cnt);
-       return ret;
-} /* end ehca_unmap_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr)
-{
-       int ret;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto free_fmr_exit0;
-       }
-
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
-                        "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle, fmr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto free_fmr_exit0;
-       }
-       /* successful deregistration */
-       ehca_mr_delete(e_fmr);
-       return 0;
-
-free_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
-       return ret;
-} /* end ehca_dealloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo);
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey, /*OUT*/
-               u32 *rkey, /*OUT*/
-               enum ehca_reg_type reg_type)
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-       if (ehca_use_hp_mr == 1)
-               hipz_acl |= 0x00000001;
-
-       h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
-                                        (u64)iova_start, size, hipz_acl,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
-                        "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_mr_exit0;
-       }
-
-       e_mr->ipz_mr_handle = hipzout.handle;
-
-       if (reg_type == EHCA_REG_BUSMAP_MR)
-               ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
-       else if (reg_type == EHCA_REG_MR)
-               ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
-       else
-               ret = -EINVAL;
-
-       if (ret)
-               goto ehca_reg_mr_exit1;
-
-       /* successful registration */
-       e_mr->num_kpages = pginfo->num_kpages;
-       e_mr->num_hwpages = pginfo->num_hwpages;
-       e_mr->hwpage_size = pginfo->hwpage_size;
-       e_mr->start = iova_start;
-       e_mr->size = size;
-       e_mr->acl = acl;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_mr_exit1:
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
-                        h_ret, shca, e_mr, iova_start, size, acl, e_pd,
-                        hipzout.lkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages, ret);
-               ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
-                        "not recoverable");
-       }
-ehca_reg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx",
-                        ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
-                        pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo)
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 rnum;
-       u64 rpage;
-       u32 i;
-       u64 *kpage;
-
-       if (!pginfo->num_hwpages) /* in case of fmr */
-               return 0;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_reg_mr_rpages_exit0;
-       }
-
-       /* max MAX_RPAGES ehca mr pages per register call */
-       for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
-                       if (rnum == 0)
-                               rnum = MAX_RPAGES;      /* last shot is full */
-               } else
-                       rnum = MAX_RPAGES;
-
-               ret = ehca_set_pagebuf(pginfo, rnum, kpage);
-               if (ret) {
-                       ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-                                "bad rc, ret=%i rnum=%x kpage=%p",
-                                ret, rnum, kpage);
-                       goto ehca_reg_mr_rpages_exit1;
-               }
-
-               if (rnum > 1) {
-                       rpage = __pa(kpage);
-                       if (!rpage) {
-                               ehca_err(&shca->ib_device, "kpage=%p i=%x",
-                                        kpage, i);
-                               ret = -EFAULT;
-                               goto ehca_reg_mr_rpages_exit1;
-                       }
-               } else
-                       rpage = *kpage;
-
-               h_ret = hipz_h_register_rpage_mr(
-                       shca->ipz_hca_handle, e_mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       /*
-                        * check for 'registration complete'==H_SUCCESS
-                        * and for 'page registered'==H_PAGE_REGISTERED
-                        */
-                       if (h_ret != H_SUCCESS) {
-                               ehca_err(&shca->ib_device, "last "
-                                        "hipz_reg_rpage_mr failed, h_ret=%lli "
-                                        "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
-                                        " lkey=%x", h_ret, e_mr, i,
-                                        shca->ipz_hca_handle.handle,
-                                        e_mr->ipz_mr_handle.handle,
-                                        e_mr->ib.ib_mr.lkey);
-                               ret = ehca2ib_return_code(h_ret);
-                               break;
-                       } else
-                               ret = 0;
-               } else if (h_ret != H_PAGE_REGISTERED) {
-                       ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-                                "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
-                                "mr_hndl=%llx", h_ret, e_mr, i,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-                       ret = ehca2ib_return_code(h_ret);
-                       break;
-               } else
-                       ret = 0;
-       } /* end for(i) */
-
-
-ehca_reg_mr_rpages_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_reg_mr_rpages_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
-                        pginfo, pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr_rpages() */
-
-/*----------------------------------------------------------------------*/
-
-inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
-                               struct ehca_mr *e_mr,
-                               u64 *iova_start,
-                               u64 size,
-                               u32 acl,
-                               struct ehca_pd *e_pd,
-                               struct ehca_mr_pginfo *pginfo,
-                               u32 *lkey, /*OUT*/
-                               u32 *rkey) /*OUT*/
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       u64 *kpage;
-       u64 rpage;
-       struct ehca_mr_pginfo pginfo_save;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_rereg_mr_rereg1_exit0;
-       }
-
-       pginfo_save = *pginfo;
-       ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
-       if (ret) {
-               ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
-                        "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
-                        "kpage=%p", e_mr, pginfo, pginfo->type,
-                        pginfo->num_kpages, pginfo->num_hwpages, kpage);
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       rpage = __pa(kpage);
-       if (!rpage) {
-               ehca_err(&shca->ib_device, "kpage=%p", kpage);
-               ret = -EFAULT;
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
-                                     (u64)iova_start, size, hipz_acl,
-                                     e_pd->fw_pd, rpage, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               /*
-                * reregistration unsuccessful, try it again with the 3 hCalls,
-                * e.g. this is required in case H_MR_CONDITION
-                * (MW bound or MR is shared)
-                */
-               ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-                         "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
-               *pginfo = pginfo_save;
-               ret = -EAGAIN;
-       } else if ((u64 *)hipzout.vaddr != iova_start) {
-               ehca_err(&shca->ib_device, "PHYP changed iova_start in "
-                        "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
-                        "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
-                        hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
-                        e_mr->ib.ib_mr.lkey, hipzout.lkey);
-               ret = -EFAULT;
-       } else {
-               /*
-                * successful reregistration
-                * note: start and start_out are identical for eServer HCAs
-                */
-               e_mr->num_kpages = pginfo->num_kpages;
-               e_mr->num_hwpages = pginfo->num_hwpages;
-               e_mr->hwpage_size = pginfo->hwpage_size;
-               e_mr->start = iova_start;
-               e_mr->size = size;
-               e_mr->acl = acl;
-               *lkey = hipzout.lkey;
-               *rkey = hipzout.rkey;
-       }
-
-ehca_rereg_mr_rereg1_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_rereg_mr_rereg1_exit0:
-       if ( ret && (ret != -EAGAIN) )
-               ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx",
-                        ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages);
-       return ret;
-} /* end ehca_rereg_mr_rereg1() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int acl,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
-       int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
-
-       /* first determine reregistration hCall(s) */
-       if ((pginfo->num_hwpages > MAX_RPAGES) ||
-           (e_mr->num_hwpages > MAX_RPAGES) ||
-           (pginfo->num_hwpages > e_mr->num_hwpages)) {
-               ehca_dbg(&shca->ib_device, "Rereg3 case, "
-                        "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
-                        pginfo->num_hwpages, e_mr->num_hwpages);
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-       }
-
-       if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-               e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
-               ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
-                        e_mr);
-       }
-
-       if (rereg_1_hcall) {
-               ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
-                                          acl, e_pd, pginfo, lkey, rkey);
-               if (ret) {
-                       if (ret == -EAGAIN)
-                               rereg_3_hcall = 1;
-                       else
-                               goto ehca_rereg_mr_exit0;
-               }
-       }
-
-       if (rereg_3_hcall) {
-               struct ehca_mr save_mr;
-
-               /* first deregister old MR */
-               h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                                "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
-                                "mr->lkey=%x",
-                                h_ret, e_mr, shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle,
-                                e_mr->ib.ib_mr.lkey);
-                       ret = ehca2ib_return_code(h_ret);
-                       goto ehca_rereg_mr_exit0;
-               }
-               /* clean ehca_mr_t, without changing struct ib_mr and lock */
-               save_mr = *e_mr;
-               ehca_mr_deletenew(e_mr);
-
-               /* set some MR values */
-               e_mr->flags = save_mr.flags;
-               e_mr->hwpage_size = save_mr.hwpage_size;
-               e_mr->fmr_page_size = save_mr.fmr_page_size;
-               e_mr->fmr_max_pages = save_mr.fmr_max_pages;
-               e_mr->fmr_max_maps = save_mr.fmr_max_maps;
-               e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
-                                 e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
-               if (ret) {
-                       u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
-                       memcpy(&e_mr->flags, &(save_mr.flags),
-                              sizeof(struct ehca_mr) - offset);
-                       goto ehca_rereg_mr_exit0;
-               }
-       }
-
-ehca_rereg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
-                        "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
-                        acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
-                        rereg_1_hcall, rereg_3_hcall);
-       return ret;
-} /* end ehca_rereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_pd *e_pd =
-               container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
-       struct ehca_mr save_fmr;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       struct ehca_mr_hipzout_parms hipzout;
-       struct ehca_mr save_mr;
-
-       if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
-               /*
-                * note: after using rereg hcall with len=0,
-                * rereg hcall must be used again for registering pages
-                */
-               h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
-                                             0, 0, e_pd->fw_pd, 0, &hipzout);
-               if (h_ret == H_SUCCESS) {
-                       /* successful reregistration */
-                       e_fmr->start = NULL;
-                       e_fmr->size = 0;
-                       tmp_lkey = hipzout.lkey;
-                       tmp_rkey = hipzout.rkey;
-                       return 0;
-               }
-               /*
-                * should not happen, because length checked above,
-                * FMRs are not shared and no MW bound to FMRs
-                */
-               ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-                        "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
-                        "mr_hndl=%llx lkey=%x lkey_out=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
-               /* try free and rereg */
-       }
-
-       /* first free old FMR */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                        "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
-                        "lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_unmap_one_fmr_exit0;
-       }
-       /* clean ehca_mr_t, without changing lock */
-       save_fmr = *e_fmr;
-       ehca_mr_deletenew(e_fmr);
-
-       /* set some MR values */
-       e_fmr->flags = save_fmr.flags;
-       e_fmr->hwpage_size = save_fmr.hwpage_size;
-       e_fmr->fmr_page_size = save_fmr.fmr_page_size;
-       e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
-       e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
-       e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
-       e_fmr->acl = save_fmr.acl;
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
-                         e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
-                         &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
-               memcpy(&e_fmr->flags, &(save_mr.flags),
-                      sizeof(struct ehca_mr) - offset);
-       }
-
-ehca_unmap_one_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
-                        "fmr_max_pages=%x",
-                        ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
-       return ret;
-} /* end ehca_unmap_one_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey, /*OUT*/
-                u32 *rkey) /*OUT*/
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
-                        "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
-                        shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_smr_exit0;
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size   = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_smr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
-                        "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
-                        ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
-       return ret;
-} /* end ehca_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-static inline void *ehca_calc_sectbase(int top, int dir, int idx)
-{
-       unsigned long ret = idx;
-       ret |= dir << EHCA_DIR_INDEX_SHIFT;
-       ret |= top << EHCA_TOP_INDEX_SHIFT;
-       return __va(ret << SECTION_SIZE_BITS);
-}
-
-#define ehca_bmap_valid(entry) \
-       ((u64)entry != (u64)EHCA_INVAL_ADDR)
-
-static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
-                              struct ehca_shca *shca, struct ehca_mr *mr,
-                              struct ehca_mr_pginfo *pginfo)
-{
-       u64 h_ret = 0;
-       unsigned long page = 0;
-       u64 rpage = __pa(kpage);
-       int page_count;
-
-       void *sectbase = ehca_calc_sectbase(top, dir, idx);
-       if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
-               ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
-                                          "hwpage_size does not fit to "
-                                          "section start address");
-       }
-       page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
-
-       while (page < page_count) {
-               u64 rnum;
-               for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
-                    rnum++) {
-                       void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-                       kpage[rnum] = __pa(pg);
-               }
-
-               h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
-                       ehca_err(&shca->ib_device, "register_rpage_mr failed");
-                       return h_ret;
-               }
-       }
-       return h_ret;
-}
-
-static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
-                               struct ehca_shca *shca, struct ehca_mr *mr,
-                               struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int idx;
-
-       for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
-                       continue;
-
-               hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
-                                          pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
-                                   struct ehca_mr *mr,
-                                   struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int dir;
-
-       for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                       continue;
-
-               hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-/* register internal max-MR to internal SHCA */
-int ehca_reg_internal_maxmr(
-       struct ehca_shca *shca,
-       struct ehca_pd *e_pd,
-       struct ehca_mr **e_maxmr)  /*OUT*/
-{
-       int ret;
-       struct ehca_mr *e_mr;
-       u64 *iova_start;
-       u64 size_maxmr;
-       struct ehca_mr_pginfo pginfo;
-       struct ib_phys_buf ib_pbuf;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hw_pgsize;
-
-       if (!ehca_bmap) {
-               ret = -EFAULT;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(&shca->ib_device, "out of memory");
-               ret = -ENOMEM;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-       e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-
-       /* register internal max-MR on HCA */
-       size_maxmr = ehca_mr_len;
-       iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
-       ib_pbuf.addr = 0;
-       ib_pbuf.size = size_maxmr;
-       num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
-                               PAGE_SIZE);
-       hw_pgsize = ehca_get_max_hwpage_size(shca);
-       num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
-                                hw_pgsize);
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_PHYS;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.hwpage_size = hw_pgsize;
-       pginfo.u.phy.num_phys_buf = 1;
-       pginfo.u.phy.phys_buf_array = &ib_pbuf;
-
-       ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
-                         &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
-       if (ret) {
-               ehca_err(&shca->ib_device, "reg of internal max MR failed, "
-                        "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
-                        "num_hwpages=%x", e_mr, iova_start, size_maxmr,
-                        num_kpages, num_hwpages);
-               goto ehca_reg_internal_maxmr_exit1;
-       }
-
-       /* successful registration of all pages */
-       e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
-       e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
-       e_mr->ib.ib_mr.uobject = NULL;
-       atomic_inc(&(e_pd->ib_pd.usecnt));
-       atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
-       *e_maxmr = e_mr;
-       return 0;
-
-ehca_reg_internal_maxmr_exit1:
-       ehca_mr_delete(e_mr);
-ehca_reg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
-                        ret, shca, e_pd, e_maxmr);
-       return ret;
-} /* end ehca_reg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey)
-{
-       u64 h_ret;
-       struct ehca_mr *e_origmr = shca->maxmr;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, e_origmr, shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-} /* end ehca_reg_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
-{
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ib_pd *ib_pd;
-
-       if (!shca->maxmr) {
-               ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
-               ret = -EINVAL;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       e_maxmr = shca->maxmr;
-       ib_pd = e_maxmr->ib.ib_mr.pd;
-       shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
-
-       ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
-       if (ret) {
-               ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-                        "ret=%i e_maxmr=%p shca=%p lkey=%x",
-                        ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
-               shca->maxmr = e_maxmr;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       atomic_dec(&ib_pd->usecnt);
-
-ehca_dereg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
-                        ret, shca, shca->maxmr);
-       return ret;
-} /* end ehca_dereg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size)
-{
-       struct ib_phys_buf *pbuf = phys_buf_array;
-       u64 size_count = 0;
-       u32 i;
-
-       if (num_phys_buf == 0) {
-               ehca_gen_err("bad phys buf array len, num_phys_buf=0");
-               return -EINVAL;
-       }
-       /* check first buffer */
-       if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
-               ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
-                            "pbuf->addr=%llx pbuf->size=%llx",
-                            iova_start, pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-       if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
-           (num_phys_buf > 1)) {
-               ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
-                            "pbuf->size=%llx", pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-
-       for (i = 0; i < num_phys_buf; i++) {
-               if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
-                       ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
-                                    "pbuf->size=%llx",
-                                    i, pbuf->addr, pbuf->size);
-                       return -EINVAL;
-               }
-               if (((i > 0) && /* not 1st */
-                    (i < (num_phys_buf - 1)) &&        /* not last */
-                    (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
-                       ehca_gen_err("bad size, i=%x pbuf->size=%llx",
-                                    i, pbuf->size);
-                       return -EINVAL;
-               }
-               size_count += pbuf->size;
-               pbuf++;
-       }
-
-       *size = size_count;
-       return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
-/* check page list of map FMR verb for validness */
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len)
-{
-       u32 i;
-       u64 *page;
-
-       if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
-               ehca_gen_err("bad list_len, list_len=%x "
-                            "e_fmr->fmr_max_pages=%x fmr=%p",
-                            list_len, e_fmr->fmr_max_pages, e_fmr);
-               return -EINVAL;
-       }
-
-       /* each page must be aligned */
-       page = page_list;
-       for (i = 0; i < list_len; i++) {
-               if (*page % e_fmr->fmr_page_size) {
-                       ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
-                                    "fmr_page_size=%x", i, *page, page, e_fmr,
-                                    e_fmr->fmr_page_size);
-                       return -EINVAL;
-               }
-               page++;
-       }
-
-       return 0;
-} /* end ehca_fmr_check_page_list() */
-
-/*----------------------------------------------------------------------*/
-
-/* PAGE_SIZE >= pginfo->hwpage_size */
-static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr;
-       u32 j = 0;
-       int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-               pgaddr = page_to_pfn(sg_page(*sg))
-                       << PAGE_SHIFT;
-               *kpage = pgaddr + (pginfo->next_hwpage *
-                                  pginfo->hwpage_size);
-               if (!(*kpage)) {
-                       ehca_gen_err("pgaddr=%llx "
-                                    "sg_dma_address=%llx "
-                                    "entry=%llx next_hwpage=%llx",
-                                    pgaddr, (u64)sg_dma_address(*sg),
-                                    pginfo->u.usr.next_nmap,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               (pginfo->next_hwpage)++;
-               kpage++;
-               if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-                       (pginfo->kpage_cnt)++;
-                       (pginfo->u.usr.next_nmap)++;
-                       pginfo->next_hwpage = 0;
-                       *sg = sg_next(*sg);
-               }
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-/*
- * check given pages for contiguous layout
- * last page addr is returned in prev_pgaddr for further check
- */
-static int ehca_check_kpages_per_ate(struct scatterlist **sg,
-                                    int num_pages,
-                                    u64 *prev_pgaddr)
-{
-       for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
-               u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
-               if (ehca_debug_level >= 3)
-                       ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-                                    *(u64 *)__va(pgaddr));
-               if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
-                       ehca_gen_err("uncontiguous page found pgaddr=%llx "
-                                    "prev_pgaddr=%llx entries_left_in_hwpage=%x",
-                                    pgaddr, *prev_pgaddr, num_pages);
-                       return -EINVAL;
-               }
-               *prev_pgaddr = pgaddr;
-       }
-       return 0;
-}
-
-/* PAGE_SIZE < pginfo->hwpage_size */
-static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr, prev_pgaddr;
-       u32 j = 0;
-       int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
-       int nr_kpages = kpages_per_hwpage;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-
-               if (nr_kpages == kpages_per_hwpage) {
-                       pgaddr = (page_to_pfn(sg_page(*sg))
-                                  << PAGE_SHIFT);
-                       *kpage = pgaddr;
-                       if (!(*kpage)) {
-                               ehca_gen_err("pgaddr=%llx entry=%llx",
-                                            pgaddr, pginfo->u.usr.next_nmap);
-                               ret = -EFAULT;
-                               return ret;
-                       }
-                       /*
-                        * The first page in a hwpage must be aligned;
-                        * the first MR page is exempt from this rule.
-                        */
-                       if (pgaddr & (pginfo->hwpage_size - 1)) {
-                               if (pginfo->hwpage_cnt) {
-                                       ehca_gen_err(
-                                               "invalid alignment "
-                                               "pgaddr=%llx entry=%llx "
-                                               "mr_pgsize=%llx",
-                                               pgaddr, pginfo->u.usr.next_nmap,
-                                               pginfo->hwpage_size);
-                                       ret = -EFAULT;
-                                       return ret;
-                               }
-                               /* first MR page */
-                               pginfo->kpage_cnt =
-                                       (pgaddr &
-                                        (pginfo->hwpage_size - 1)) >>
-                                       PAGE_SHIFT;
-                               nr_kpages -= pginfo->kpage_cnt;
-                               *kpage = pgaddr &
-                                        ~(pginfo->hwpage_size - 1);
-                       }
-                       if (ehca_debug_level >= 3) {
-                               u64 val = *(u64 *)__va(pgaddr);
-                               ehca_gen_dbg("kpage=%llx page=%llx "
-                                            "value=%016llx",
-                                            *kpage, pgaddr, val);
-                       }
-                       prev_pgaddr = pgaddr;
-                       *sg = sg_next(*sg);
-                       pginfo->kpage_cnt++;
-                       pginfo->u.usr.next_nmap++;
-                       nr_kpages--;
-                       if (!nr_kpages)
-                               goto next_kpage;
-                       continue;
-               }
-
-               ret = ehca_check_kpages_per_ate(sg, nr_kpages,
-                                               &prev_pgaddr);
-               if (ret)
-                       return ret;
-               pginfo->kpage_cnt += nr_kpages;
-               pginfo->u.usr.next_nmap += nr_kpages;
-
-next_kpage:
-               nr_kpages = kpages_per_hwpage;
-               (pginfo->hwpage_cnt)++;
-               kpage++;
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-                                u32 number, u64 *kpage)
-{
-       int ret = 0;
-       struct ib_phys_buf *pbuf;
-       u64 num_hw, offs_hw;
-       u32 i = 0;
-
-       /* loop over desired phys_buf_array entries */
-       while (i < number) {
-               pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
-               num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
-                                    pbuf->size, pginfo->hwpage_size);
-               offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
-                       pginfo->hwpage_size;
-               while (pginfo->next_hwpage < offs_hw + num_hw) {
-                       /* sanity check */
-                       if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
-                           (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
-                               ehca_gen_err("kpage_cnt >= num_kpages, "
-                                            "kpage_cnt=%llx num_kpages=%llx "
-                                            "hwpage_cnt=%llx "
-                                            "num_hwpages=%llx i=%x",
-                                            pginfo->kpage_cnt,
-                                            pginfo->num_kpages,
-                                            pginfo->hwpage_cnt,
-                                            pginfo->num_hwpages, i);
-                               return -EFAULT;
-                       }
-                       *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
-                                (pginfo->next_hwpage * pginfo->hwpage_size);
-                       if ( !(*kpage) && pbuf->addr ) {
-                               ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
-                                            "next_hwpage=%llx", pbuf->addr,
-                                            pbuf->size, pginfo->next_hwpage);
-                               return -EFAULT;
-                       }
-                       (pginfo->hwpage_cnt)++;
-                       (pginfo->next_hwpage)++;
-                       if (PAGE_SIZE >= pginfo->hwpage_size) {
-                               if (pginfo->next_hwpage %
-                                   (PAGE_SIZE / pginfo->hwpage_size) == 0)
-                                       (pginfo->kpage_cnt)++;
-                       } else
-                               pginfo->kpage_cnt += pginfo->hwpage_size /
-                                       PAGE_SIZE;
-                       kpage++;
-                       i++;
-                       if (i >= number) break;
-               }
-               if (pginfo->next_hwpage >= offs_hw + num_hw) {
-                       (pginfo->u.phy.next_buf)++;
-                       pginfo->next_hwpage = 0;
-               }
-       }
-       return ret;
-}
-
-static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-                               u32 number, u64 *kpage)
-{
-       int ret = 0;
-       u64 *fmrlist;
-       u32 i;
-
-       /* loop over desired page_list entries */
-       fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
-       for (i = 0; i < number; i++) {
-               *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
-                          pginfo->next_hwpage * pginfo->hwpage_size;
-               if ( !(*kpage) ) {
-                       ehca_gen_err("*fmrlist=%llx fmrlist=%p "
-                                    "next_listelem=%llx next_hwpage=%llx",
-                                    *fmrlist, fmrlist,
-                                    pginfo->u.fmr.next_listelem,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
-                       if (pginfo->next_hwpage %
-                           (pginfo->u.fmr.fmr_pgsize /
-                            pginfo->hwpage_size) == 0) {
-                               (pginfo->kpage_cnt)++;
-                               (pginfo->u.fmr.next_listelem)++;
-                               fmrlist++;
-                               pginfo->next_hwpage = 0;
-                       } else
-                               (pginfo->next_hwpage)++;
-               } else {
-                       unsigned int cnt_per_hwpage = pginfo->hwpage_size /
-                               pginfo->u.fmr.fmr_pgsize;
-                       unsigned int j;
-                       u64 prev = *kpage;
-                       /* check if adrs are contiguous */
-                       for (j = 1; j < cnt_per_hwpage; j++) {
-                               u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
-                               if (prev + pginfo->u.fmr.fmr_pgsize != p) {
-                                       ehca_gen_err("uncontiguous fmr pages "
-                                                    "found prev=%llx p=%llx "
-                                                    "idx=%x", prev, p, i + j);
-                                       return -EINVAL;
-                               }
-                               prev = p;
-                       }
-                       pginfo->kpage_cnt += cnt_per_hwpage;
-                       pginfo->u.fmr.next_listelem += cnt_per_hwpage;
-                       fmrlist += cnt_per_hwpage;
-               }
-               kpage++;
-       }
-       return ret;
-}
-
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage)
-{
-       int ret;
-
-       switch (pginfo->type) {
-       case EHCA_MR_PGI_PHYS:
-               ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_USER:
-               ret = PAGE_SIZE >= pginfo->hwpage_size ?
-                       ehca_set_pagebuf_user1(pginfo, number, kpage) :
-                       ehca_set_pagebuf_user2(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_FMR:
-               ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
-               break;
-       default:
-               ehca_gen_err("bad pginfo->type=%x", pginfo->type);
-               ret = -EFAULT;
-               break;
-       }
-       return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check MR if it is a max-MR, i.e. uses whole memory
- * in case it's a max-MR 1 is returned, else 0
- */
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start)
-{
-       /* a MR is treated as max-MR only if it fits following: */
-       if ((size == ehca_mr_len) &&
-           (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
-               ehca_gen_dbg("this is a max-MR");
-               return 1;
-       } else
-               return 0;
-} /* end ehca_mr_is_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* map access control for MR/MW. This routine is used for MR and MW. */
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl)
-{
-       *hipz_acl = 0;
-       if (ib_acl & IB_ACCESS_REMOTE_READ)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
-       if (ib_acl & IB_ACCESS_REMOTE_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
-       if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
-       if (ib_acl & IB_ACCESS_LOCAL_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
-       if (ib_acl & IB_ACCESS_MW_BIND)
-               *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
-} /* end ehca_mrmw_map_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
-{
-       *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
-} /* end ehca_mrmw_set_pgsize_hipz_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * reverse map access control for MR/MW.
- * This routine is used for MR and MW.
- */
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl) /*OUT*/
-{
-       *ib_acl = 0;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
-               *ib_acl |= IB_ACCESS_REMOTE_READ;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
-               *ib_acl |= IB_ACCESS_REMOTE_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
-               *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
-               *ib_acl |= IB_ACCESS_LOCAL_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
-               *ib_acl |= IB_ACCESS_MW_BIND;
-} /* end ehca_mrmw_reverse_map_acl() */
-
-
-/*----------------------------------------------------------------------*/
-
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
-{
-       mr->flags = 0;
-       mr->num_kpages = 0;
-       mr->num_hwpages = 0;
-       mr->acl = 0;
-       mr->start = NULL;
-       mr->fmr_page_size = 0;
-       mr->fmr_max_pages = 0;
-       mr->fmr_max_maps = 0;
-       mr->fmr_map_cnt = 0;
-       memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
-       memset(&mr->galpas, 0, sizeof(mr->galpas));
-} /* end ehca_mr_deletenew() */
-
-int ehca_init_mrmw_cache(void)
-{
-       mr_cache = kmem_cache_create("ehca_cache_mr",
-                                    sizeof(struct ehca_mr), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mr_cache)
-               return -ENOMEM;
-       mw_cache = kmem_cache_create("ehca_cache_mw",
-                                    sizeof(struct ehca_mw), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mw_cache) {
-               kmem_cache_destroy(mr_cache);
-               mr_cache = NULL;
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void ehca_cleanup_mrmw_cache(void)
-{
-       kmem_cache_destroy(mr_cache);
-       kmem_cache_destroy(mw_cache);
-}
-
-static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
-                                    int dir)
-{
-       if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
-               ehca_top_bmap->dir[dir] =
-                       kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
-               if (!ehca_top_bmap->dir[dir])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
-       }
-       return 0;
-}
-
-static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
-{
-       if (!ehca_bmap_valid(ehca_bmap->top[top])) {
-               ehca_bmap->top[top] =
-                       kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
-               if (!ehca_bmap->top[top])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
-       }
-       return ehca_init_top_bmap(ehca_bmap->top[top], dir);
-}
-
-static inline int ehca_calc_index(unsigned long i, unsigned long s)
-{
-       return (i >> s) & EHCA_INDEX_MASK;
-}
-
-void ehca_destroy_busmap(void)
-{
-       int top, dir;
-
-       if (!ehca_bmap)
-               return;
-
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-                       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                               continue;
-
-                       kfree(ehca_bmap->top[top]->dir[dir]);
-               }
-
-               kfree(ehca_bmap->top[top]);
-       }
-
-       kfree(ehca_bmap);
-       ehca_bmap = NULL;
-}
-
-static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
-{
-       unsigned long i, start_section, end_section;
-       int top, dir, idx;
-
-       if (!nr_pages)
-               return 0;
-
-       if (!ehca_bmap) {
-               ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
-               if (!ehca_bmap)
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
-       }
-
-       start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-       end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
-       for (i = start_section; i < end_section; i++) {
-               int ret;
-               top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
-               dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
-               idx = i & EHCA_INDEX_MASK;
-
-               ret = ehca_init_bmap(ehca_bmap, top, dir);
-               if (ret) {
-                       ehca_destroy_busmap();
-                       return ret;
-               }
-               ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
-               ehca_mr_len += EHCA_SECTSIZE;
-       }
-       return 0;
-}
-
-static int ehca_is_hugepage(unsigned long pfn)
-{
-       int page_order;
-
-       if (pfn & EHCA_HUGEPAGE_PFN_MASK)
-               return 0;
-
-       page_order = compound_order(pfn_to_page(pfn));
-       if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
-               return 0;
-
-       return 1;
-}
-
-static int ehca_create_busmap_callback(unsigned long initial_pfn,
-                                      unsigned long total_nr_pages, void *arg)
-{
-       int ret;
-       unsigned long pfn, start_pfn, end_pfn, nr_pages;
-
-       if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
-               return ehca_update_busmap(initial_pfn, total_nr_pages);
-
-       /* Given chunk is >= 16GB -> check for hugepages */
-       start_pfn = initial_pfn;
-       end_pfn = initial_pfn + total_nr_pages;
-       pfn = start_pfn;
-
-       while (pfn < end_pfn) {
-               if (ehca_is_hugepage(pfn)) {
-                       /* Add mem found in front of the hugepage */
-                       nr_pages = pfn - start_pfn;
-                       ret = ehca_update_busmap(start_pfn, nr_pages);
-                       if (ret)
-                               return ret;
-                       /* Skip the hugepage */
-                       pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
-                       start_pfn = pfn;
-               } else
-                       pfn += (EHCA_SECTSIZE / PAGE_SIZE);
-       }
-
-       /* Add mem found behind the hugepage(s)  */
-       nr_pages = pfn - start_pfn;
-       return ehca_update_busmap(start_pfn, nr_pages);
-}
-
-int ehca_create_busmap(void)
-{
-       int ret;
-
-       ehca_mr_len = 0;
-       ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
-                                  ehca_create_busmap_callback);
-       return ret;
-}
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo)
-{
-       int top;
-       u64 hret, *kpage;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               return -ENOMEM;
-       }
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
-               if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
-                       break;
-       }
-
-       ehca_free_fw_ctrlblock(kpage);
-
-       if (hret == H_SUCCESS)
-               return 0; /* Everything is fine */
-       else {
-               ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
-                                "h_ret=%lli e_mr=%p top=%x lkey=%x "
-                                "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-               return ehca2ib_return_code(hret);
-       }
-}
-
-static u64 ehca_map_vaddr(void *caddr)
-{
-       int top, dir, idx;
-       unsigned long abs_addr, offset;
-       u64 entry;
-
-       if (!ehca_bmap)
-               return EHCA_INVAL_ADDR;
-
-       abs_addr = __pa(caddr);
-       top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]))
-               return EHCA_INVAL_ADDR;
-
-       dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-               return EHCA_INVAL_ADDR;
-
-       idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
-
-       entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
-       if (ehca_bmap_valid(entry)) {
-               offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
-               return entry | offset;
-       } else
-               return EHCA_INVAL_ADDR;
-}
-
-static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == EHCA_INVAL_ADDR;
-}
-
-static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                              size_t size, enum dma_data_direction direction)
-{
-       if (cpu_addr)
-               return ehca_map_vaddr(cpu_addr);
-       else
-               return EHCA_INVAL_ADDR;
-}
-
-static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                 enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
-                            unsigned long offset, size_t size,
-                            enum dma_data_direction direction)
-{
-       u64 addr;
-
-       if (offset + size > PAGE_SIZE)
-               return EHCA_INVAL_ADDR;
-
-       addr = ehca_map_vaddr(page_address(page));
-       if (!ehca_dma_mapping_error(dev, addr))
-               addr += offset;
-
-       return addr;
-}
-
-static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                          int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nents, i) {
-               u64 addr;
-               addr = ehca_map_vaddr(sg_virt(sg));
-               if (ehca_dma_mapping_error(dev, addr))
-                       return 0;
-
-               sg->dma_address = addr;
-               sg->dma_length = sg->length;
-       }
-       return nents;
-}
-
-static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
-                             int nents, enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-       dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
-}
-
-static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                           size_t size,
-                                           enum dma_data_direction dir)
-{
-       dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                    u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-       u64 dma_addr;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p) {
-               addr = page_address(p);
-               dma_addr = ehca_map_vaddr(addr);
-               if (ehca_dma_mapping_error(dev, dma_addr)) {
-                       free_pages((unsigned long)addr, get_order(size));
-                       return NULL;
-               }
-               if (dma_handle)
-                       *dma_handle = dma_addr;
-               return addr;
-       }
-       return NULL;
-}
-
-static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
-                                  void *cpu_addr, u64 dma_handle)
-{
-       if (cpu_addr && size)
-               free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-
-struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
-       .mapping_error          = ehca_dma_mapping_error,
-       .map_single             = ehca_dma_map_single,
-       .unmap_single           = ehca_dma_unmap_single,
-       .map_page               = ehca_dma_map_page,
-       .unmap_page             = ehca_dma_unmap_page,
-       .map_sg                 = ehca_dma_map_sg,
-       .unmap_sg               = ehca_dma_unmap_sg,
-       .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
-       .sync_single_for_device = ehca_dma_sync_single_for_device,
-       .alloc_coherent         = ehca_dma_alloc_coherent,
-       .free_coherent          = ehca_dma_free_coherent,
-};
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
deleted file mode 100644 (file)
index 50d8b51..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW declarations and inline functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _EHCA_MRMW_H_
-#define _EHCA_MRMW_H_
-
-enum ehca_reg_type {
-       EHCA_REG_MR,
-       EHCA_REG_BUSMAP_MR
-};
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey,
-               u32 *rkey,
-               enum ehca_reg_type reg_type);
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo);
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int mr_access_flags,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey);
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr);
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey,
-                u32 *rkey);
-
-int ehca_reg_internal_maxmr(struct ehca_shca *shca,
-                           struct ehca_pd *e_pd,
-                           struct ehca_mr **maxmr);
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey);
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size);
-
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len);
-
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage);
-
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start);
-
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl);
-
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
-
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl);
-
-void ehca_mr_deletenew(struct ehca_mr *mr);
-
-int ehca_create_busmap(void);
-
-void ehca_destroy_busmap(void);
-
-extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
-#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
deleted file mode 100644 (file)
index 2a8aae4..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  PD functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-
-static struct kmem_cache *pd_cache;
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context, struct ib_udata *udata)
-{
-       struct ehca_pd *pd;
-       int i;
-
-       pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
-       if (!pd) {
-               ehca_err(device, "device=%p context=%p out of memory",
-                        device, context);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0; i < 2; i++) {
-               INIT_LIST_HEAD(&pd->free[i]);
-               INIT_LIST_HEAD(&pd->full[i]);
-       }
-       mutex_init(&pd->lock);
-
-       /*
-        * Kernel PD: when device = -1, 0
-        * User   PD: when context != -1
-        */
-       if (!context) {
-               /*
-                * Kernel PDs after init reuses always
-                * the one created in ehca_shca_reopen()
-                */
-               struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                                     ib_device);
-               pd->fw_pd.value = shca->pd->fw_pd.value;
-       } else
-               pd->fw_pd.value = (u64)pd;
-
-       return &pd->ib_pd;
-}
-
-int ehca_dealloc_pd(struct ib_pd *pd)
-{
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       int i, leftovers = 0;
-       struct ipz_small_queue_page *page, *tmp;
-
-       for (i = 0; i < 2; i++) {
-               list_splice(&my_pd->full[i], &my_pd->free[i]);
-               list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
-                       leftovers = 1;
-                       free_page(page->page);
-                       kmem_cache_free(small_qp_cache, page);
-               }
-       }
-
-       if (leftovers)
-               ehca_warn(pd->device,
-                         "Some small queue pages were not freed");
-
-       kmem_cache_free(pd_cache, my_pd);
-
-       return 0;
-}
-
-int ehca_init_pd_cache(void)
-{
-       pd_cache = kmem_cache_create("ehca_cache_pd",
-                                    sizeof(struct ehca_pd), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!pd_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_pd_cache(void)
-{
-       kmem_cache_destroy(pd_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
deleted file mode 100644 (file)
index 90c4efa..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Hardware request structures
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _EHCA_QES_H_
-#define _EHCA_QES_H_
-
-#include "ehca_tools.h"
-
-/* virtual scatter gather entry to specify remote addresses with length */
-struct ehca_vsgentry {
-       u64 vaddr;
-       u32 lkey;
-       u32 length;
-};
-
-#define GRH_FLAG_MASK        EHCA_BMASK_IBM( 7,  7)
-#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM( 0,  3)
-#define GRH_TCLASS_MASK      EHCA_BMASK_IBM( 4, 12)
-#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13, 31)
-#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32, 47)
-#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48, 55)
-#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56, 63)
-
-/*
- * Unreliable Datagram Address Vector Format
- * see IBTA Vol1 chapter 8.3 Global Routing Header
- */
-struct ehca_ud_av {
-       u8 sl;
-       u8 lnh;
-       u16 dlid;
-       u8 reserved1;
-       u8 reserved2;
-       u8 reserved3;
-       u8 slid_path_bits;
-       u8 reserved4;
-       u8 ipd;
-       u8 reserved5;
-       u8 pmtu;
-       u32 reserved6;
-       u64 reserved7;
-       union {
-               struct {
-                       u64 word_0; /* always set to 6  */
-                       /*should be 0x1B for IB transport */
-                       u64 word_1;
-                       u64 word_2;
-                       u64 word_3;
-                       u64 word_4;
-               } grh;
-               struct {
-                       u32 wd_0;
-                       u32 wd_1;
-                       /* DWord_1 --> SGID */
-
-                       u32 sgid_wd3;
-                       u32 sgid_wd2;
-
-                       u32 sgid_wd1;
-                       u32 sgid_wd0;
-                       /* DWord_3 --> DGID */
-
-                       u32 dgid_wd3;
-                       u32 dgid_wd2;
-
-                       u32 dgid_wd1;
-                       u32 dgid_wd0;
-               } grh_l;
-       };
-};
-
-/* maximum number of sg entries allowed in a WQE */
-#define MAX_WQE_SG_ENTRIES 252
-
-#define WQE_OPTYPE_SEND             0x80
-#define WQE_OPTYPE_RDMAREAD         0x40
-#define WQE_OPTYPE_RDMAWRITE        0x20
-#define WQE_OPTYPE_CMPSWAP          0x10
-#define WQE_OPTYPE_FETCHADD         0x08
-#define WQE_OPTYPE_BIND             0x04
-
-#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
-#define WQE_WRFLAG_FENCE            0x40
-#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
-#define WQE_WRFLAG_SOLIC_EVENT      0x10
-
-#define WQEF_CACHE_HINT             0x80
-#define WQEF_CACHE_HINT_RD_WR       0x40
-#define WQEF_TIMED_WQE              0x20
-#define WQEF_PURGE                  0x08
-#define WQEF_HIGH_NIBBLE            0xF0
-
-#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
-#define MW_BIND_ACCESSCTRL_R_READ    0x20
-#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
-
-struct ehca_wqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 wr_flag;
-       u16 pkeyi;
-       u8 wqef;
-       u8 nr_of_data_seg;
-       u16 wqe_provided_slid;
-       u32 destination_qp_number;
-       u32 resync_psn_sqp;
-       u32 local_ee_context_qkey;
-       u32 immediate_data;
-       union {
-               struct {
-                       u64 remote_virtual_address;
-                       u32 rkey;
-                       u32 reserved;
-                       u64 atomic_1st_op_dma_len;
-                       u64 atomic_2nd_op;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-
-               } nud;
-               struct {
-                       u64 ehca_ud_av_ptr;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } ud_avp;
-               struct {
-                       struct ehca_ud_av ud_av;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
-                                                    2];
-               } ud_av;
-               struct {
-                       u64 reserved0;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } all_rcv;
-
-               struct {
-                       u64 reserved;
-                       u32 rkey;
-                       u32 old_rkey;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 virtual_address;
-                       u32 reserved3;
-                       u32 length;
-                       u32 reserved4;
-                       u16 reserved5;
-                       u8 reserved6;
-                       u8 lr_ctl;
-                       u32 lkey;
-                       u32 reserved7;
-                       u64 reserved8;
-                       u64 reserved9;
-                       u64 reserved10;
-                       u64 reserved11;
-               } bind;
-               struct {
-                       u64 reserved12;
-                       u64 reserved13;
-                       u32 size;
-                       u32 start;
-               } inline_data;
-       } u;
-
-};
-
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
-#define WC_IMM_DATA     EHCA_BMASK_IBM(1, 1)
-#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2, 2)
-#define WC_SE_BIT       EHCA_BMASK_IBM(3, 3)
-#define WC_STATUS_ERROR_BIT 0x80000000
-#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
-#define WC_STATUS_PURGE_BIT 0x10
-#define WC_SEND_RECEIVE_BIT 0x80
-
-struct ehca_cqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 w_completion_flags;
-       u16 reserved1;
-       u32 nr_bytes_transferred;
-       u32 immediate_data;
-       u32 local_qp_number;
-       u8 freed_resource_count;
-       u8 service_level;
-       u16 wqe_count;
-       u32 qp_token;
-       u32 qkey_ee_token;
-       u32 remote_qp_number;
-       u16 dlid;
-       u16 rlid;
-       u16 reserved2;
-       u16 pkey_index;
-       u32 cqe_timestamp;
-       u32 wqe_timestamp;
-       u8 wqe_timestamp_valid;
-       u8 reserved3;
-       u8 reserved4;
-       u8 cqe_flags;
-       u32 status;
-};
-
-struct ehca_eqe {
-       u64 entry;
-};
-
-struct ehca_mrte {
-       u64 starting_va;
-       u64 length; /* length of memory region in bytes*/
-       u32 pd;
-       u8 key_instance;
-       u8 pagesize;
-       u8 mr_control;
-       u8 local_remote_access_ctrl;
-       u8 reserved[0x20 - 0x18];
-       u64 at_pointer[4];
-};
-#endif /*_EHCA_QES_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
deleted file mode 100644 (file)
index 896c01f..0000000
+++ /dev/null
@@ -1,2256 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  QP functions
- *
- *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
- *           Stefan Roscher <stefan.roscher@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-static struct kmem_cache *qp_cache;
-
-/*
- * attributes not supported by query qp
- */
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
-                                    IB_QP_EN_SQD_ASYNC_NOTIFY)
-
-/*
- * ehca (internal) qp state values
- */
-enum ehca_qp_state {
-       EHCA_QPS_RESET = 1,
-       EHCA_QPS_INIT = 2,
-       EHCA_QPS_RTR = 3,
-       EHCA_QPS_RTS = 5,
-       EHCA_QPS_SQD = 6,
-       EHCA_QPS_SQE = 8,
-       EHCA_QPS_ERR = 128
-};
-
-/*
- * qp state transitions as defined by IB Arch Rel 1.1 page 431
- */
-enum ib_qp_statetrans {
-       IB_QPST_ANY2RESET,
-       IB_QPST_ANY2ERR,
-       IB_QPST_RESET2INIT,
-       IB_QPST_INIT2RTR,
-       IB_QPST_INIT2INIT,
-       IB_QPST_RTR2RTS,
-       IB_QPST_RTS2SQD,
-       IB_QPST_RTS2RTS,
-       IB_QPST_SQD2RTS,
-       IB_QPST_SQE2RTS,
-       IB_QPST_SQD2SQD,
-       IB_QPST_MAX     /* nr of transitions, this must be last!!! */
-};
-
-/*
- * ib2ehca_qp_state maps IB to ehca qp_state
- * returns ehca qp state corresponding to given ib qp state
- */
-static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
-{
-       switch (ib_qp_state) {
-       case IB_QPS_RESET:
-               return EHCA_QPS_RESET;
-       case IB_QPS_INIT:
-               return EHCA_QPS_INIT;
-       case IB_QPS_RTR:
-               return EHCA_QPS_RTR;
-       case IB_QPS_RTS:
-               return EHCA_QPS_RTS;
-       case IB_QPS_SQD:
-               return EHCA_QPS_SQD;
-       case IB_QPS_SQE:
-               return EHCA_QPS_SQE;
-       case IB_QPS_ERR:
-               return EHCA_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca2ib_qp_state maps ehca to IB qp_state
- * returns ib qp state corresponding to given ehca qp state
- */
-static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
-                                               ehca_qp_state)
-{
-       switch (ehca_qp_state) {
-       case EHCA_QPS_RESET:
-               return IB_QPS_RESET;
-       case EHCA_QPS_INIT:
-               return IB_QPS_INIT;
-       case EHCA_QPS_RTR:
-               return IB_QPS_RTR;
-       case EHCA_QPS_RTS:
-               return IB_QPS_RTS;
-       case EHCA_QPS_SQD:
-               return IB_QPS_SQD;
-       case EHCA_QPS_SQE:
-               return IB_QPS_SQE;
-       case EHCA_QPS_ERR:
-               return IB_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca_qp_type used as index for req_attr and opt_attr of
- * struct ehca_modqp_statetrans
- */
-enum ehca_qp_type {
-       QPT_RC = 0,
-       QPT_UC = 1,
-       QPT_UD = 2,
-       QPT_SQP = 3,
-       QPT_MAX
-};
-
-/*
- * ib2ehcaqptype maps Ib to ehca qp_type
- * returns ehca qp type corresponding to ib qp type
- */
-static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return QPT_SQP;
-       case IB_QPT_RC:
-               return QPT_RC;
-       case IB_QPT_UC:
-               return QPT_UC;
-       case IB_QPT_UD:
-               return QPT_UD;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
-                                                        int ib_tostate)
-{
-       int index = -EINVAL;
-       switch (ib_tostate) {
-       case IB_QPS_RESET:
-               index = IB_QPST_ANY2RESET;
-               break;
-       case IB_QPS_INIT:
-               switch (ib_fromstate) {
-               case IB_QPS_RESET:
-                       index = IB_QPST_RESET2INIT;
-                       break;
-               case IB_QPS_INIT:
-                       index = IB_QPST_INIT2INIT;
-                       break;
-               }
-               break;
-       case IB_QPS_RTR:
-               if (ib_fromstate == IB_QPS_INIT)
-                       index = IB_QPST_INIT2RTR;
-               break;
-       case IB_QPS_RTS:
-               switch (ib_fromstate) {
-               case IB_QPS_RTR:
-                       index = IB_QPST_RTR2RTS;
-                       break;
-               case IB_QPS_RTS:
-                       index = IB_QPST_RTS2RTS;
-                       break;
-               case IB_QPS_SQD:
-                       index = IB_QPST_SQD2RTS;
-                       break;
-               case IB_QPS_SQE:
-                       index = IB_QPST_SQE2RTS;
-                       break;
-               }
-               break;
-       case IB_QPS_SQD:
-               if (ib_fromstate == IB_QPS_RTS)
-                       index = IB_QPST_RTS2SQD;
-               break;
-       case IB_QPS_SQE:
-               break;
-       case IB_QPS_ERR:
-               index = IB_QPST_ANY2ERR;
-               break;
-       default:
-               break;
-       }
-       return index;
-}
-
-/*
- * ibqptype2servicetype returns hcp service type corresponding to given
- * ib qp type used by create_qp()
- */
-static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return ST_UD;
-       case IB_QPT_RC:
-               return ST_RC;
-       case IB_QPT_UC:
-               return ST_UC;
-       case IB_QPT_UD:
-               return ST_UD;
-       case IB_QPT_RAW_IPV6:
-               return -EINVAL;
-       case IB_QPT_RAW_ETHERTYPE:
-               return -EINVAL;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-/*
- * init userspace queue info from ipz_queue data
- */
-static inline void queue2resp(struct ipzu_queue_resp *resp,
-                             struct ipz_queue *queue)
-{
-       resp->qe_size = queue->qe_size;
-       resp->act_nr_of_sg = queue->act_nr_of_sg;
-       resp->queue_length = queue->queue_length;
-       resp->pagesize = queue->pagesize;
-       resp->toggle_state = queue->toggle_state;
-       resp->offset = queue->offset;
-}
-
-/*
- * init_qp_queue initializes/constructs r/squeue and registers queue pages.
- */
-static inline int init_qp_queue(struct ehca_shca *shca,
-                               struct ehca_pd *pd,
-                               struct ehca_qp *my_qp,
-                               struct ipz_queue *queue,
-                               int q_type,
-                               u64 expected_hret,
-                               struct ehca_alloc_queue_parms *parms,
-                               int wqe_size)
-{
-       int ret, cnt, ipz_rc, nr_q_pages;
-       void *vpage;
-       u64 rpage, h_ret;
-       struct ib_device *ib_dev = &shca->ib_device;
-       struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
-
-       if (!parms->queue_size)
-               return 0;
-
-       if (parms->is_small) {
-               nr_q_pages = 1;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       128 << parms->page_size,
-                                       wqe_size, parms->act_nr_sges, 1);
-       } else {
-               nr_q_pages = parms->queue_size;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       EHCA_PAGESIZE, wqe_size,
-                                       parms->act_nr_sges, 0);
-       }
-
-       if (!ipz_rc) {
-               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
-                        ipz_rc);
-               return -EBUSY;
-       }
-
-       /* register queue pages */
-       for (cnt = 0; cnt < nr_q_pages; cnt++) {
-               vpage = ipz_qpageit_get_inc(queue);
-               if (!vpage) {
-                       ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                "failed p_vpage= %p", vpage);
-                       ret = -EINVAL;
-                       goto init_qp_queue1;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-                                                my_qp->ipz_qp_handle,
-                                                NULL, 0, q_type,
-                                                rpage, parms->is_small ? 0 : 1,
-                                                my_qp->galpas.kernel);
-               if (cnt == (nr_q_pages - 1)) {  /* last page! */
-                       if (h_ret != expected_hret) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-                       vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-                       if (vpage) {
-                               ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                        "should not succeed vpage=%p", vpage);
-                               ret = -EINVAL;
-                               goto init_qp_queue1;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(queue);
-
-       return 0;
-
-init_qp_queue1:
-       ipz_queue_dtor(pd, queue);
-       return ret;
-}
-
-static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
-{
-       if (is_llqp)
-               return 128 << act_nr_sge;
-       else
-               return offsetof(struct ehca_wqe,
-                               u.nud.sg_list[act_nr_sge]);
-}
-
-static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
-                                      int req_nr_sge, int is_llqp)
-{
-       u32 wqe_size, q_size;
-       int act_nr_sge = req_nr_sge;
-
-       if (!is_llqp)
-               /* round up #SGEs so WQE size is a power of 2 */
-               for (act_nr_sge = 4; act_nr_sge <= 252;
-                    act_nr_sge = 4 + 2 * act_nr_sge)
-                       if (act_nr_sge >= req_nr_sge)
-                               break;
-
-       wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
-       q_size = wqe_size * (queue->max_wr + 1);
-
-       if (q_size <= 512)
-               queue->page_size = 2;
-       else if (q_size <= 1024)
-               queue->page_size = 3;
-       else
-               queue->page_size = 0;
-
-       queue->is_small = (queue->page_size != 0);
-}
-
-/* needs to be called with cq->spinlock held */
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
-{
-       struct list_head *list, *node;
-
-       /* TODO: support low latency QPs */
-       if (qp->ext_type == EQPT_LLQP)
-               return;
-
-       if (on_sq) {
-               list = &qp->send_cq->sqp_err_list;
-               node = &qp->sq_err_node;
-       } else {
-               list = &qp->recv_cq->rqp_err_list;
-               node = &qp->rq_err_node;
-       }
-
-       if (list_empty(node))
-               list_add_tail(node, list);
-
-       return;
-}
-
-static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-
-       if (!list_empty(node))
-               list_del_init(node);
-
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-}
-
-static void reset_queue_map(struct ehca_queue_map *qmap)
-{
-       int i;
-
-       qmap->tail = qmap->entries - 1;
-       qmap->left_to_poll = 0;
-       qmap->next_wqe_idx = 0;
-       for (i = 0; i < qmap->entries; i++) {
-               qmap->map[i].reported = 1;
-               qmap->map[i].cqe_req = 0;
-       }
-}
-
-/*
- * Create an ib_qp struct that is either a QP or an SRQ, depending on
- * the value of the is_srq parameter. If init_attr and srq_init_attr share
- * fields, the field out of init_attr is used.
- */
-static struct ehca_qp *internal_create_qp(
-       struct ib_pd *pd,
-       struct ib_qp_init_attr *init_attr,
-       struct ib_srq_init_attr *srq_init_attr,
-       struct ib_udata *udata, int is_srq)
-{
-       struct ehca_qp *my_qp, *my_srq = NULL;
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct ib_ucontext *context = NULL;
-       u64 h_ret;
-       int is_llqp = 0, has_srq = 0, is_user = 0;
-       int qp_type, max_send_sge, max_recv_sge, ret;
-
-       /* h_call's out parameters */
-       struct ehca_alloc_qp_parms parms;
-       u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
-       unsigned long flags;
-
-       if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
-               ehca_err(pd->device, "Unable to create QP, max number of %i "
-                        "QPs reached.", shca->max_num_qps);
-               ehca_err(pd->device, "To increase the maximum number of QPs "
-                        "use the number_of_qps module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       if (init_attr->create_flags) {
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       memset(&parms, 0, sizeof(parms));
-       qp_type = init_attr->qp_type;
-
-       if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
-               init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
-               ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
-                        init_attr->sq_sig_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* save LLQP info */
-       if (qp_type & 0x80) {
-               is_llqp = 1;
-               parms.ext_type = EQPT_LLQP;
-               parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
-       }
-       qp_type &= 0x1F;
-       init_attr->qp_type &= 0x1F;
-
-       /* handle SRQ base QPs */
-       if (init_attr->srq) {
-               my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
-
-               if (qp_type == IB_QPT_UC) {
-                       ehca_err(pd->device, "UC with SRQ not supported");
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               has_srq = 1;
-               parms.ext_type = EQPT_SRQBASE;
-               parms.srq_qpn = my_srq->real_qp_num;
-       }
-
-       if (is_llqp && has_srq) {
-               ehca_err(pd->device, "LLQPs can't have an SRQ");
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* handle SRQs */
-       if (is_srq) {
-               parms.ext_type = EQPT_SRQ;
-               parms.srq_limit = srq_init_attr->attr.srq_limit;
-               if (init_attr->cap.max_recv_sge > 3) {
-                       ehca_err(pd->device, "no more than three SGEs "
-                                "supported for SRQ  pd=%p  max_sge=%x",
-                                pd, init_attr->cap.max_recv_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       /* check QP type */
-       if (qp_type != IB_QPT_UD &&
-           qp_type != IB_QPT_UC &&
-           qp_type != IB_QPT_RC &&
-           qp_type != IB_QPT_SMI &&
-           qp_type != IB_QPT_GSI) {
-               ehca_err(pd->device, "wrong QP Type=%x", qp_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (is_llqp) {
-               switch (qp_type) {
-               case IB_QPT_RC:
-                       if ((init_attr->cap.max_send_wr > 255) ||
-                           (init_attr->cap.max_recv_wr > 255)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_sq_wr=%x "
-                                        "or max_rq_wr=%x for RC LLQP",
-                                        init_attr->cap.max_send_wr,
-                                        init_attr->cap.max_recv_wr);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               case IB_QPT_UD:
-                       if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
-                               ehca_err(pd->device, "UD LLQP not supported "
-                                        "by this adapter");
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-ENOSYS);
-                       }
-                       if (!(init_attr->cap.max_send_sge <= 5
-                           && init_attr->cap.max_send_sge >= 1
-                           && init_attr->cap.max_recv_sge <= 5
-                           && init_attr->cap.max_recv_sge >= 1)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_send_sge=%x "
-                                        "or max_recv_sge=%x for UD LLQP",
-                                        init_attr->cap.max_send_sge,
-                                        init_attr->cap.max_recv_sge);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       } else if (init_attr->cap.max_send_wr > 255) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of "
-                                        "max_send_wr=%x for UD QP_TYPE=%x",
-                                        init_attr->cap.max_send_wr, qp_type);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               default:
-                       ehca_err(pd->device, "unsupported LL QP Type=%x",
-                                qp_type);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       } else {
-               int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
-                              || qp_type == IB_QPT_GSI) ? 250 : 252;
-
-               if (init_attr->cap.max_send_sge > max_sge
-                   || init_attr->cap.max_recv_sge > max_sge) {
-                       ehca_err(pd->device, "Invalid number of SGEs requested "
-                                "send_sge=%x recv_sge=%x max_sge=%x",
-                                init_attr->cap.max_send_sge,
-                                init_attr->cap.max_recv_sge, max_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
-       if (!my_qp) {
-               ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       if (pd->uobject && udata) {
-               is_user = 1;
-               context = pd->uobject->context;
-       }
-
-       atomic_set(&my_qp->nr_events, 0);
-       init_waitqueue_head(&my_qp->wait_completion);
-       spin_lock_init(&my_qp->spinlock_s);
-       spin_lock_init(&my_qp->spinlock_r);
-       my_qp->qp_type = qp_type;
-       my_qp->ext_type = parms.ext_type;
-       my_qp->state = IB_QPS_RESET;
-
-       if (init_attr->recv_cq)
-               my_qp->recv_cq =
-                       container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-       if (init_attr->send_cq)
-               my_qp->send_cq =
-                       container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-
-       ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
-       if (ret >= 0)
-               my_qp->token = ret;
-
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-       idr_preload_end();
-       if (ret < 0) {
-               if (ret == -ENOSPC) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid number of qp");
-               } else {
-                       ret = -ENOMEM;
-                       ehca_err(pd->device, "Can't allocate new idr entry.");
-               }
-               goto create_qp_exit0;
-       }
-
-       if (has_srq)
-               parms.srq_token = my_qp->token;
-
-       parms.servicetype = ibqptype2servicetype(qp_type);
-       if (parms.servicetype < 0) {
-               ret = -EINVAL;
-               ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-               goto create_qp_exit1;
-       }
-
-       /* Always signal by WQE so we can hide circ. WQEs */
-       parms.sigtype = HCALL_SIGT_BY_WQE;
-
-       /* UD_AV CIRCUMVENTION */
-       max_send_sge = init_attr->cap.max_send_sge;
-       max_recv_sge = init_attr->cap.max_recv_sge;
-       if (parms.servicetype == ST_UD && !is_llqp) {
-               max_send_sge += 2;
-               max_recv_sge += 2;
-       }
-
-       parms.token = my_qp->token;
-       parms.eq_handle = shca->eq.ipz_eq_handle;
-       parms.pd = my_pd->fw_pd;
-       if (my_qp->send_cq)
-               parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
-       if (my_qp->recv_cq)
-               parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
-
-       parms.squeue.max_wr = init_attr->cap.max_send_wr;
-       parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
-       parms.squeue.max_sge = max_send_sge;
-       parms.rqueue.max_sge = max_recv_sge;
-
-       /* RC QPs need one more SWQE for unsolicited ack circumvention */
-       if (qp_type == IB_QPT_RC)
-               parms.squeue.max_wr++;
-
-       if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
-               if (HAS_SQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.squeue, max_send_sge, is_llqp);
-               if (HAS_RQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.rqueue, max_recv_sge, is_llqp);
-               parms.qp_storage =
-                       (parms.squeue.is_small || parms.rqueue.is_small);
-       }
-
-       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
-                        h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto create_qp_exit1;
-       }
-
-       ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
-       my_qp->ipz_qp_handle = parms.qp_handle;
-       my_qp->galpas = parms.galpas;
-
-       swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
-       rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
-
-       switch (qp_type) {
-       case IB_QPT_RC:
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               }
-               /* hide the extra WQE */
-               parms.squeue.act_nr_wqes--;
-               break;
-       case IB_QPT_UD:
-       case IB_QPT_GSI:
-       case IB_QPT_SMI:
-               /* UD circumvention */
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               } else {
-                       parms.squeue.act_nr_sges -= 2;
-                       parms.rqueue.act_nr_sges -= 2;
-               }
-
-               if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
-                       parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
-                       parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
-                       parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
-                       parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
-                       ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
-               }
-
-               break;
-
-       default:
-               break;
-       }
-
-       /* initialize r/squeue and register queue pages */
-       if (HAS_SQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
-                       HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
-                       &parms.squeue, swqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize squeue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit2;
-               }
-
-               if (!is_user) {
-                       my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-                               my_qp->ipz_squeue.qe_size;
-                       my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->sq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit3;
-                       }
-                       INIT_LIST_HEAD(&my_qp->sq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->sq_map);
-               }
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
-                       H_SUCCESS, &parms.rqueue, rwqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize rqueue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit4;
-               }
-               if (!is_user) {
-                       my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-                               my_qp->ipz_rqueue.qe_size;
-                       my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->rq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit5;
-                       }
-                       INIT_LIST_HEAD(&my_qp->rq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->rq_map);
-               }
-       } else if (init_attr->srq && !is_user) {
-               /* this is a base QP, use the queue map of the SRQ */
-               my_qp->rq_map = my_srq->rq_map;
-               INIT_LIST_HEAD(&my_qp->rq_err_node);
-
-               my_qp->ipz_rqueue = my_srq->ipz_rqueue;
-       }
-
-       if (is_srq) {
-               my_qp->ib_srq.pd = &my_pd->ib_pd;
-               my_qp->ib_srq.device = my_pd->ib_pd.device;
-
-               my_qp->ib_srq.srq_context = init_attr->qp_context;
-               my_qp->ib_srq.event_handler = init_attr->event_handler;
-       } else {
-               my_qp->ib_qp.qp_num = ib_qp_num;
-               my_qp->ib_qp.pd = &my_pd->ib_pd;
-               my_qp->ib_qp.device = my_pd->ib_pd.device;
-
-               my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-               my_qp->ib_qp.send_cq = init_attr->send_cq;
-
-               my_qp->ib_qp.qp_type = qp_type;
-               my_qp->ib_qp.srq = init_attr->srq;
-
-               my_qp->ib_qp.qp_context = init_attr->qp_context;
-               my_qp->ib_qp.event_handler = init_attr->event_handler;
-       }
-
-       init_attr->cap.max_inline_data = 0; /* not supported yet */
-       init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
-       init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
-       init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
-       init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
-       my_qp->init_attr = *init_attr;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                       &my_qp->ib_qp;
-               if (ehca_nr_ports < 0) {
-                       /* alloc array to cache subsequent modify qp parms
-                        * for autodetect mode
-                        */
-                       my_qp->mod_qp_parm =
-                               kzalloc(EHCA_MOD_QP_PARM_MAX *
-                                       sizeof(*my_qp->mod_qp_parm),
-                                       GFP_KERNEL);
-                       if (!my_qp->mod_qp_parm) {
-                               ehca_err(pd->device,
-                                        "Could not alloc mod_qp_parm");
-                               goto create_qp_exit5;
-                       }
-               }
-       }
-
-       /* NOTE: define_apq0() not supported yet */
-       if (qp_type == IB_QPT_GSI) {
-               h_ret = ehca_define_sqp(shca, my_qp, init_attr);
-               if (h_ret != H_SUCCESS) {
-                       kfree(my_qp->mod_qp_parm);
-                       my_qp->mod_qp_parm = NULL;
-                       /* the QP pointer is no longer valid */
-                       shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                               NULL;
-                       ret = ehca2ib_return_code(h_ret);
-                       goto create_qp_exit6;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
-               if (ret) {
-                       ehca_err(pd->device,
-                                "Couldn't assign qp to send_cq ret=%i", ret);
-                       goto create_qp_exit7;
-               }
-       }
-
-       /* copy queues, galpa data to user space */
-       if (context && udata) {
-               struct ehca_create_qp_resp resp;
-               memset(&resp, 0, sizeof(resp));
-
-               resp.qp_num = my_qp->real_qp_num;
-               resp.token = my_qp->token;
-               resp.qp_type = my_qp->qp_type;
-               resp.ext_type = my_qp->ext_type;
-               resp.qkey = my_qp->qkey;
-               resp.real_qp_num = my_qp->real_qp_num;
-
-               if (HAS_SQ(my_qp))
-                       queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
-               if (HAS_RQ(my_qp))
-                       queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
-               resp.fw_handle_ofs = (u32)
-                       (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
-
-               if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
-                       ehca_err(pd->device, "Copy to udata failed");
-                       ret = -EINVAL;
-                       goto create_qp_exit8;
-               }
-       }
-
-       return my_qp;
-
-create_qp_exit8:
-       ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-
-create_qp_exit7:
-       kfree(my_qp->mod_qp_parm);
-
-create_qp_exit6:
-       if (HAS_RQ(my_qp) && !is_user)
-               vfree(my_qp->rq_map.map);
-
-create_qp_exit5:
-       if (HAS_RQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-create_qp_exit4:
-       if (HAS_SQ(my_qp) && !is_user)
-               vfree(my_qp->sq_map.map);
-
-create_qp_exit3:
-       if (HAS_SQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-create_qp_exit2:
-       hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-
-create_qp_exit1:
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-create_qp_exit0:
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return ERR_PTR(ret);
-}
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *qp_init_attr,
-                            struct ib_udata *udata)
-{
-       struct ehca_qp *ret;
-
-       ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
-       return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *srq_init_attr,
-                              struct ib_udata *udata)
-{
-       struct ib_qp_init_attr qp_init_attr;
-       struct ehca_qp *my_qp;
-       struct ib_srq *ret;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 hret, update_mask;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-               return ERR_PTR(-ENOSYS);
-
-       /* For common attributes, internal_create_qp() takes its info
-        * out of qp_init_attr, so copy all common attrs there.
-        */
-       memset(&qp_init_attr, 0, sizeof(qp_init_attr));
-       qp_init_attr.event_handler = srq_init_attr->event_handler;
-       qp_init_attr.qp_context = srq_init_attr->srq_context;
-       qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.qp_type = IB_QPT_RC;
-       qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
-       qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
-
-       my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
-       if (IS_ERR(my_qp))
-               return (struct ib_srq *)my_qp;
-
-       /* copy back return values */
-       srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
-       srq_init_attr->attr.max_sge = 3;
-
-       /* drive SRQ into RTR state */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(pd->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               ret = ERR_PTR(-ENOMEM);
-               goto create_srq1;
-       }
-
-       mqpcb->qp_state = EHCA_QPS_INIT;
-       mqpcb->prim_phys_port = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to INIT "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_enable = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not enable SRQ "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_state  = EHCA_QPS_RTR;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to RTR "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return &my_qp->ib_srq;
-
-create_srq2:
-       ret = ERR_PTR(ehca2ib_return_code(hret));
-       ehca_free_fw_ctrlblock(mqpcb);
-
-create_srq1:
-       internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
-
-       return ret;
-}
-
-/*
- * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
- * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
- * returns total number of bad wqes in bad_wqe_cnt
- */
-static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
-                          int *bad_wqe_cnt)
-{
-       u64 h_ret;
-       struct ipz_queue *squeue;
-       void *bad_send_wqe_p, *bad_send_wqe_v;
-       u64 q_ofs;
-       struct ehca_wqe *wqe;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* get send wqe pointer */
-       h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                                          my_qp->ipz_qp_handle, &my_qp->pf,
-                                          &bad_send_wqe_p, NULL, 2);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-                        " ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp_num, h_ret);
-               return ehca2ib_return_code(h_ret);
-       }
-       bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
-       ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
-                qp_num, bad_send_wqe_p);
-       /* convert wqe pointer to vadr */
-       bad_send_wqe_v = __va((u64)bad_send_wqe_p);
-       if (ehca_debug_level >= 2)
-               ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
-       squeue = &my_qp->ipz_squeue;
-       if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
-               ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
-                        " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
-               return -EFAULT;
-       }
-
-       /* loop sets wqe's purge bit */
-       wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-       *bad_wqe_cnt = 0;
-       while (wqe->optype != 0xff && wqe->wqef != 0xff) {
-               if (ehca_debug_level >= 2)
-                       ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
-               wqe->nr_of_data_seg = 0; /* suppress data access */
-               wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-               q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-               *bad_wqe_cnt = (*bad_wqe_cnt)+1;
-       }
-       /*
-        * bad wqe will be reprocessed and ignored when pol_cq() is called,
-        *  i.e. nr of wqes with flush error status is one less
-        */
-       ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
-                qp_num, (*bad_wqe_cnt)-1);
-       wqe->wqef = 0;
-
-       return 0;
-}
-
-static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
-                         struct ehca_queue_map *qmap)
-{
-       void *wqe_v;
-       u64 q_ofs;
-       u32 wqe_idx;
-       unsigned int tail_idx;
-
-       /* convert real to abs address */
-       wqe_p = wqe_p & (~(1UL << 63));
-
-       wqe_v = __va(wqe_p);
-
-       if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
-               ehca_gen_err("Invalid offset for calculating left cqes "
-                               "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
-               return -EFAULT;
-       }
-
-       tail_idx = next_index(qmap->tail, qmap->entries);
-       wqe_idx = q_ofs / ipz_queue->qe_size;
-
-       /* check all processed wqes, whether a cqe is requested or not */
-       while (tail_idx != wqe_idx) {
-               if (qmap->map[tail_idx].cqe_req)
-                       qmap->left_to_poll++;
-               tail_idx = next_index(tail_idx, qmap->entries);
-       }
-       /* save index in queue, where we have to start flushing */
-       qmap->next_wqe_idx = wqe_idx;
-       return 0;
-}
-
-static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
-{
-       u64 h_ret;
-       void *send_wqe_p, *recv_wqe_p;
-       int ret;
-       unsigned long flags;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* this hcall is not supported on base QPs */
-       if (my_qp->ext_type != EQPT_SRQBASE) {
-               /* get send and receive wqe pointer */
-               h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle, &my_qp->pf,
-                               &send_wqe_p, &recv_wqe_p, 4);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "disable_and_get_wqe() "
-                                "failed ehca_qp=%p qp_num=%x h_ret=%lli",
-                                my_qp, qp_num, h_ret);
-                       return ehca2ib_return_code(h_ret);
-               }
-
-               /*
-                * acquire lock to ensure that nobody is polling the cq which
-                * could mean that the qmap->tail pointer is in an
-                * inconsistent state.
-                */
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
-                               &my_qp->sq_map);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
-                               &my_qp->rq_map);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-       } else {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               my_qp->sq_map.left_to_poll = 0;
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               my_qp->rq_map.left_to_poll = 0;
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-       }
-
-       /* this assures flush cqes being generated only for pending wqes */
-       if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ehca_add_to_err_list(my_qp, 1);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               if (HAS_RQ(my_qp)) {
-                       spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-                       ehca_add_to_err_list(my_qp, 0);
-                       spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
-                                       flags);
-               }
-       }
-
-       return 0;
-}
-
-/*
- * internal_modify_qp with circumvention to handle aqp0 properly
- * smi_reset2init indicates if this is an internal reset-to-init-call for
- * smi. This flag must always be zero if called from ehca_modify_qp()!
- * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
- */
-static int internal_modify_qp(struct ib_qp *ibqp,
-                             struct ib_qp_attr *attr,
-                             int attr_mask, int smi_reset2init)
-{
-       enum ib_qp_state qp_cur_state, qp_new_state;
-       int cnt, qp_attr_idx, ret = 0;
-       enum ib_qp_statetrans statetrans;
-       struct hcp_modify_qp_control_block *mqpcb;
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca =
-               container_of(ibqp->pd->device, struct ehca_shca, ib_device);
-       u64 update_mask;
-       u64 h_ret;
-       int bad_wqe_cnt = 0;
-       int is_user = 0;
-       int squeue_locked = 0;
-       unsigned long flags = 0;
-
-       /* do query_qp to obtain current attr values */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!mqpcb) {
-               ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               mqpcb, my_qp->galpas.kernel);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, ibqp->qp_num, h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto modify_qp_exit1;
-       }
-       if (ibqp->uobject)
-               is_user = 1;
-
-       qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
-
-       if (qp_cur_state == -EINVAL) {  /* invalid qp state */
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        mqpcb->qp_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-       /*
-        * circumvention to set aqp0 initial state to init
-        * as expected by IB spec
-        */
-       if (smi_reset2init == 0 &&
-           ibqp->qp_type == IB_QPT_SMI &&
-           qp_cur_state == IB_QPS_RESET &&
-           (attr_mask & IB_QP_STATE) &&
-           attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
-               struct ib_qp_attr smiqp_attr = {
-                       .qp_state = IB_QPS_INIT,
-                       .port_num = my_qp->init_attr.port_num,
-                       .pkey_index = 0,
-                       .qkey = 0
-               };
-               int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
-                       IB_QP_PKEY_INDEX | IB_QP_QKEY;
-               int smirc = internal_modify_qp(
-                       ibqp, &smiqp_attr, smiqp_attr_mask, 1);
-               if (smirc) {
-                       ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-                                "ehca_modify_qp() rc=%i", smirc);
-                       ret = H_PARAMETER;
-                       goto modify_qp_exit1;
-               }
-               qp_cur_state = IB_QPS_INIT;
-               ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
-       }
-       /* is transmitted current state  equal to "real" current state */
-       if ((attr_mask & IB_QP_CUR_STATE) &&
-           qp_cur_state != attr->cur_qp_state) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
-                        " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
-                        attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
-                "new qp_state=%x attribute_mask=%x",
-                my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
-
-       qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
-       if (!smi_reset2init &&
-           !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid qp transition new_state=%x cur_state=%x "
-                        "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
-                        qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
-               goto modify_qp_exit1;
-       }
-
-       mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
-       if (mqpcb->qp_state)
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       else {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid new qp state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qp_new_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       /* retrieve state transition struct to get req and opt attrs */
-       statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
-       if (statetrans < 0) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
-                        "new_qp_state=%x State_xsition=%x ehca_qp=%p "
-                        "qp_num=%x", qp_cur_state, qp_new_state,
-                        statetrans, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
-
-       if (qp_attr_idx < 0) {
-               ret = qp_attr_idx;
-               ehca_err(ibqp->device,
-                        "Invalid QP type=%x ehca_qp=%p qp_num=%x",
-                        ibqp->qp_type, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device,
-                "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
-                my_qp, ibqp->qp_num, statetrans);
-
-       /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
-        * in non-LL UD QPs.
-        */
-       if ((my_qp->qp_type == IB_QPT_UD) &&
-           (my_qp->ext_type != EQPT_LLQP) &&
-           (statetrans == IB_QPST_INIT2RTR) &&
-           (shca->hw_level >= 0x22)) {
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-               mqpcb->send_grh_flag = 1;
-       }
-
-       /* sqe -> rts: set purge bit of bad wqe before actual trans */
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* mark next free wqe if kernel */
-               if (!ibqp->uobject) {
-                       struct ehca_wqe *wqe;
-                       /* lock send queue */
-                       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-                       squeue_locked = 1;
-                       /* mark next free wqe */
-                       wqe = (struct ehca_wqe *)
-                               ipz_qeit_get(&my_qp->ipz_squeue);
-                       wqe->optype = wqe->wqef = 0xff;
-                       ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
-                                ibqp->qp_num, wqe);
-               }
-               ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
-               if (ret) {
-                       ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-                                "ehca_qp=%p qp_num=%x ret=%i",
-                                my_qp, ibqp->qp_num, ret);
-                       goto modify_qp_exit2;
-               }
-       }
-
-       /*
-        * enable RDMA_Atomic_Control if reset->init und reliable con
-        * this is necessary since gen2 does not provide that flag,
-        * but pHyp requires it
-        */
-       if (statetrans == IB_QPST_RESET2INIT &&
-           (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
-               mqpcb->rdma_atomic_ctrl = 3;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
-       }
-       /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
-       if (statetrans == IB_QPST_INIT2RTR &&
-           (ibqp->qp_type == IB_QPT_UC) &&
-           !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
-               mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               if (attr->pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_p_key_idx = attr->pkey_index;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
-       }
-       if (attr_mask & IB_QP_PORT) {
-               struct ehca_sport *sport;
-               struct ehca_qp *aqp1;
-               if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               sport = &shca->sport[attr->port_num - 1];
-               if (!sport->ibqp_sqp[IB_QPT_GSI]) {
-                       /* should not occur */
-                       ret = -EFAULT;
-                       ehca_err(ibqp->device, "AQP1 was not created for "
-                                "port=%x", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
-                                   struct ehca_qp, ib_qp);
-               if (ibqp->qp_type != IB_QPT_GSI &&
-                   ibqp->qp_type != IB_QPT_SMI &&
-                   aqp1->mod_qp_parm) {
-                       /*
-                        * firmware will reject this modify_qp() because
-                        * port is not activated/initialized fully
-                        */
-                       ret = -EFAULT;
-                       ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
-                                 "either port is being activated (try again) "
-                                 "or cabling issue", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_phys_port = attr->port_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
-       }
-       if (attr_mask & IB_QP_QKEY) {
-               mqpcb->qkey = attr->qkey;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
-       }
-       if (attr_mask & IB_QP_AV) {
-               mqpcb->dlid = attr->ah_attr.dlid;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
-               mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
-               mqpcb->service_level = attr->ah_attr.sl;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
-
-               if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
-                                 attr->ah_attr.static_rate,
-                                 &mqpcb->max_static_rate)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag = 1;
-
-                       mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid.byte[cnt] =
-                                       attr->ah_attr.grh.dgid.raw[cnt];
-
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
-                       mqpcb->flow_label = attr->ah_attr.grh.flow_label;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
-                       mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
-                       mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU) {
-               /* store ld(MTU) */
-               my_qp->mtu_shift = attr->path_mtu + 7;
-               mqpcb->path_mtu = attr->path_mtu;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
-       }
-       if (attr_mask & IB_QP_TIMEOUT) {
-               mqpcb->timeout = attr->timeout;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
-       }
-       if (attr_mask & IB_QP_RETRY_CNT) {
-               mqpcb->retry_count = attr->retry_cnt;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               mqpcb->rnr_retry_count = attr->rnr_retry;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RQ_PSN) {
-               mqpcb->receive_psn = attr->rq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
-       }
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
-                       attr->max_dest_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
-                       attr->max_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET
-                       (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
-       }
-       if (attr_mask & IB_QP_ALT_PATH) {
-               if (attr->alt_port_num < 1
-                   || attr->alt_port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->alt_port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_phys_port = attr->alt_port_num;
-
-               if (attr->alt_pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_p_key_idx = attr->alt_pkey_index;
-
-               mqpcb->timeout_al = attr->alt_timeout;
-               mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-               mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-               mqpcb->service_level_al = attr->alt_ah_attr.sl;
-
-               if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
-                                 attr->alt_ah_attr.static_rate,
-                                 &mqpcb->max_static_rate_al)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-
-               /* OpenIB doesn't support alternate retry counts - copy them */
-               mqpcb->retry_count_al = mqpcb->retry_count;
-               mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
-
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag_al = 1;
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid_al.byte[cnt] =
-                                       attr->alt_ah_attr.grh.dgid.raw[cnt];
-                       mqpcb->source_gid_idx_al =
-                               attr->alt_ah_attr.grh.sgid_index;
-                       mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-                       mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-                       mqpcb->traffic_class_al =
-                               attr->alt_ah_attr.grh.traffic_class;
-
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-               mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
-       }
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               mqpcb->send_psn = attr->sq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
-       }
-
-       if (attr_mask & IB_QP_DEST_QPN) {
-               mqpcb->dest_qp_nr = attr->dest_qp_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
-       }
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE) {
-               if (attr->path_mig_state != IB_MIG_REARM
-                   && attr->path_mig_state != IB_MIG_MIGRATED) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid mig_state=%x",
-                                attr->path_mig_state);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->path_migration_state = attr->path_mig_state + 1;
-               if (attr->path_mig_state == IB_MIG_REARM)
-                       my_qp->mig_armed = 1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
-       }
-
-       if (attr_mask & IB_QP_CAP) {
-               mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
-               mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
-               /* no support for max_send/recv_sge yet */
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                my_qp->ipz_qp_handle,
-                                &my_qp->pf,
-                                update_mask,
-                                mqpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
-               goto modify_qp_exit2;
-       }
-
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* doorbell to reprocessing wqes */
-               iosync(); /* serialize GAL register access */
-               hipz_update_sqa(my_qp, bad_wqe_cnt-1);
-               ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
-       }
-
-       if (statetrans == IB_QPST_RESET2INIT ||
-           statetrans == IB_QPST_INIT2INIT) {
-               mqpcb->qp_enable = 1;
-               mqpcb->qp_state = EHCA_QPS_INIT;
-               update_mask = 0;
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-
-               h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                        my_qp->ipz_qp_handle,
-                                        &my_qp->pf,
-                                        update_mask,
-                                        mqpcb,
-                                        my_qp->galpas.kernel);
-
-               if (h_ret != H_SUCCESS) {
-                       ret = ehca2ib_return_code(h_ret);
-                       ehca_err(ibqp->device, "ENABLE in context of "
-                                "RESET_2_INIT failed! Maybe you didn't get "
-                                "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
-                                h_ret, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-       }
-       if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
-           && !is_user) {
-               ret = check_for_left_cqes(my_qp, shca);
-               if (ret)
-                       goto modify_qp_exit2;
-       }
-
-       if (statetrans == IB_QPST_ANY2RESET) {
-               ipz_qeit_reset(&my_qp->ipz_rqueue);
-               ipz_qeit_reset(&my_qp->ipz_squeue);
-
-               if (qp_cur_state == IB_QPS_ERR && !is_user) {
-                       del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-                       if (HAS_RQ(my_qp))
-                               del_from_err_list(my_qp->recv_cq,
-                                                 &my_qp->rq_err_node);
-               }
-               if (!is_user)
-                       reset_queue_map(&my_qp->sq_map);
-
-               if (HAS_RQ(my_qp) && !is_user)
-                       reset_queue_map(&my_qp->rq_map);
-       }
-
-       if (attr_mask & IB_QP_QKEY)
-               my_qp->qkey = attr->qkey;
-
-modify_qp_exit2:
-       if (squeue_locked) { /* this means: sqe -> rts */
-               spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-               my_qp->sqerr_purgeflag = 1;
-       }
-
-modify_qp_exit1:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata)
-{
-       int ret = 0;
-
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-
-       /* The if-block below caches qp_attr to be modified for GSI and SMI
-        * qps during the initialization by ib_mad. When the respective port
-        * is activated, ie we got an event PORT_ACTIVE, we'll replay the
-        * cached modify calls sequence, see ehca_recover_sqs() below.
-        * Why that is required:
-        * 1) If one port is connected, older code requires that port one
-        *    to be connected and module option nr_ports=1 to be given by
-        *    user, which is very inconvenient for end user.
-        * 2) Firmware accepts modify_qp() only if respective port has become
-        *    active. Older code had a wait loop of 30sec create_qp()/
-        *    define_aqp1(), which is not appropriate in practice. This
-        *    code now removes that wait loop, see define_aqp1(), and always
-        *    reports all ports to ib_mad resp. users. Only activated ports
-        *    will then usable for the users.
-        */
-       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
-               int port = my_qp->init_attr.port_num;
-               struct ehca_sport *sport = &shca->sport[port - 1];
-               unsigned long flags;
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               /* cache qp_attr only during init */
-               if (my_qp->mod_qp_parm) {
-                       struct ehca_mod_qp_parm *p;
-                       if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
-                               ehca_err(&shca->ib_device,
-                                        "mod_qp_parm overflow state=%x port=%x"
-                                        " type=%x", attr->qp_state,
-                                        my_qp->init_attr.port_num,
-                                        ibqp->qp_type);
-                               spin_unlock_irqrestore(&sport->mod_sqp_lock,
-                                                      flags);
-                               return -EINVAL;
-                       }
-                       p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
-                       p->mask = attr_mask;
-                       p->attr = *attr;
-                       my_qp->mod_qp_parm_idx++;
-                       ehca_dbg(&shca->ib_device,
-                                "Saved qp_attr for state=%x port=%x type=%x",
-                                attr->qp_state, my_qp->init_attr.port_num,
-                                ibqp->qp_type);
-                       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-                       goto out;
-               }
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
-
-out:
-       if ((ret == 0) && (attr_mask & IB_QP_STATE))
-               my_qp->state = attr->qp_state;
-
-       return ret;
-}
-
-void ehca_recover_sqp(struct ib_qp *sqp)
-{
-       struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
-       int port = my_sqp->init_attr.port_num;
-       struct ib_qp_attr attr;
-       struct ehca_mod_qp_parm *qp_parm;
-       int i, qp_parm_idx, ret;
-       unsigned long flags, wr_cnt;
-
-       if (!my_sqp->mod_qp_parm)
-               return;
-       ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
-
-       qp_parm = my_sqp->mod_qp_parm;
-       qp_parm_idx = my_sqp->mod_qp_parm_idx;
-       for (i = 0; i < qp_parm_idx; i++) {
-               attr = qp_parm[i].attr;
-               ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
-               if (ret) {
-                       ehca_err(sqp->device, "Could not modify SQP port=%x "
-                                "qp_num=%x ret=%x", port, sqp->qp_num, ret);
-                       goto free_qp_parm;
-               }
-               ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
-                        port, sqp->qp_num, attr.qp_state);
-       }
-
-       /* re-trigger posted recv wrs */
-       wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
-               my_sqp->ipz_rqueue.qe_size;
-       if (wr_cnt) {
-               spin_lock_irqsave(&my_sqp->spinlock_r, flags);
-               hipz_update_rqa(my_sqp, wr_cnt);
-               spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
-               ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
-                        port, sqp->qp_num, wr_cnt);
-       }
-
-free_qp_parm:
-       kfree(qp_parm);
-       /* this prevents subsequent calls to modify_qp() to cache qp_attr */
-       my_sqp->mod_qp_parm = NULL;
-}
-
-int ehca_query_qp(struct ib_qp *qp,
-                 struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int cnt, ret = 0;
-       u64 h_ret;
-
-       if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
-               ehca_err(qp->device, "Invalid attribute mask "
-                        "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
-                        my_qp, qp->qp_num, qp_attr_mask);
-               return -EINVAL;
-       }
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(qp->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(qp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp->qp_num, h_ret);
-               goto query_qp_exit1;
-       }
-
-       qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
-       qp_attr->qp_state = qp_attr->cur_qp_state;
-
-       if (qp_attr->cur_qp_state == -EINVAL) {
-               ret = -EINVAL;
-               ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qpcb->qp_state, my_qp, qp->qp_num);
-               goto query_qp_exit1;
-       }
-
-       if (qp_attr->qp_state == IB_QPS_SQD)
-               qp_attr->sq_draining = 1;
-
-       qp_attr->qkey = qpcb->qkey;
-       qp_attr->path_mtu = qpcb->path_mtu;
-       qp_attr->path_mig_state = qpcb->path_migration_state - 1;
-       qp_attr->rq_psn = qpcb->receive_psn;
-       qp_attr->sq_psn = qpcb->send_psn;
-       qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
-       qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
-       qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
-       /* UD_AV CIRCUMVENTION */
-       if (my_qp->qp_type == IB_QPT_UD) {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe - 2;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe - 2;
-       } else {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe;
-       }
-
-       qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
-       qp_attr->dest_qp_num = qpcb->dest_qp_nr;
-
-       qp_attr->pkey_index = qpcb->prim_p_key_idx;
-       qp_attr->port_num = qpcb->prim_phys_port;
-       qp_attr->timeout = qpcb->timeout;
-       qp_attr->retry_cnt = qpcb->retry_count;
-       qp_attr->rnr_retry = qpcb->rnr_retry_count;
-
-       qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
-       qp_attr->alt_port_num = qpcb->alt_phys_port;
-       qp_attr->alt_timeout = qpcb->timeout_al;
-
-       qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
-       qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
-
-       /* primary av */
-       qp_attr->ah_attr.sl = qpcb->service_level;
-
-       if (qpcb->send_grh_flag) {
-               qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
-       qp_attr->ah_attr.dlid = qpcb->dlid;
-       qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
-       qp_attr->ah_attr.port_num = qp_attr->port_num;
-
-       /* primary GRH */
-       qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
-       qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
-       qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
-       qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid.byte[cnt];
-
-       /* alternate AV */
-       qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
-       if (qpcb->send_grh_flag_al) {
-               qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
-       qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
-       qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
-
-       /* alternate GRH */
-       qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
-       qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
-       qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
-       qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid_al.byte[cnt];
-
-       /* return init attributes given in ehca_create_qp */
-       if (qp_init_attr)
-               *qp_init_attr = my_qp->init_attr;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
-
-query_qp_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-       struct ehca_qp *my_qp =
-               container_of(ibsrq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca =
-               container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 update_mask;
-       u64 h_ret;
-       int ret = 0;
-
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       update_mask = 0;
-       if (attr_mask & IB_SRQ_LIMIT) {
-               attr_mask &= ~IB_SRQ_LIMIT;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-               mqpcb->curr_srq_limit = attr->srq_limit;
-               mqpcb->qp_aff_asyn_ev_log_reg =
-                       EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
-       }
-
-       /* by now, all bits in attr_mask should have been cleared */
-       if (attr_mask) {
-               ehca_err(ibsrq->device, "invalid attribute mask bits set  "
-                        "attr_mask=%x", attr_mask);
-               ret = -EINVAL;
-               goto modify_srq_exit0;
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
-                                NULL, update_mask, mqpcb,
-                                my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x",
-                        h_ret, my_qp, my_qp->real_qp_num);
-       }
-
-modify_srq_exit0:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
-{
-       struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int ret = 0;
-       u64 h_ret;
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(srq->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
-                               NULL, qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(srq->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, h_ret);
-               goto query_srq_exit1;
-       }
-
-       srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
-       srq_attr->max_sge = 3;
-       srq_attr->srq_limit = qpcb->curr_srq_limit;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-query_srq_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject)
-{
-       struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
-       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-                                            ib_pd);
-       struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-       u32 qp_num = my_qp->real_qp_num;
-       int ret;
-       u64 h_ret;
-       u8 port_num;
-       int is_user = 0;
-       enum ib_qp_type qp_type;
-       unsigned long flags;
-
-       if (uobject) {
-               is_user = 1;
-               if (my_qp->mm_count_galpa ||
-                   my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-                       ehca_err(dev, "Resources still referenced in "
-                                "user space qp_num=%x", qp_num);
-                       return -EINVAL;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
-               if (ret) {
-                       ehca_err(dev, "Couldn't unassign qp from "
-                                "send_cq ret=%i qp_num=%x cq_num=%x", ret,
-                                qp_num, my_qp->send_cq->cq_number);
-                       return ret;
-               }
-       }
-
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-       /*
-        * SRQs will never get into an error list and do not have a recv_cq,
-        * so we need to skip them here.
-        */
-       if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
-
-       if (HAS_SQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
-
-       h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
-               return ehca2ib_return_code(h_ret);
-       }
-
-       port_num = my_qp->init_attr.port_num;
-       qp_type  = my_qp->init_attr.qp_type;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               kfree(my_qp->mod_qp_parm);
-               my_qp->mod_qp_parm = NULL;
-               shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       /* no support for IB_QPT_SMI yet */
-       if (qp_type == IB_QPT_GSI) {
-               struct ib_event event;
-               ehca_info(dev, "device %s: port %x is inactive.",
-                               shca->ib_device.name, port_num);
-               event.device = &shca->ib_device;
-               event.event = IB_EVENT_PORT_ERR;
-               event.element.port_num = port_num;
-               shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
-               ib_dispatch_event(&event);
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-               if (!is_user)
-                       vfree(my_qp->rq_map.map);
-       }
-       if (HAS_SQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-               if (!is_user)
-                       vfree(my_qp->sq_map.map);
-       }
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return 0;
-}
-
-int ehca_destroy_qp(struct ib_qp *qp)
-{
-       return internal_destroy_qp(qp->device,
-                                  container_of(qp, struct ehca_qp, ib_qp),
-                                  qp->uobject);
-}
-
-int ehca_destroy_srq(struct ib_srq *srq)
-{
-       return internal_destroy_qp(srq->device,
-                                  container_of(srq, struct ehca_qp, ib_srq),
-                                  srq->uobject);
-}
-
-int ehca_init_qp_cache(void)
-{
-       qp_cache = kmem_cache_create("ehca_cache_qp",
-                                    sizeof(struct ehca_qp), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!qp_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_qp_cache(void)
-{
-       kmem_cache_destroy(qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
deleted file mode 100644 (file)
index 10e2074..0000000
+++ /dev/null
@@ -1,954 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  post_send/recv, poll_cq, req_notify
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-/* in RC traffic, insert an empty RDMA READ every this many packets */
-#define ACK_CIRC_THRESHOLD 2000000
-
-static u64 replace_wr_id(u64 wr_id, u16 idx)
-{
-       u64 ret;
-
-       ret = wr_id & ~QMAP_IDX_MASK;
-       ret |= idx & QMAP_IDX_MASK;
-
-       return ret;
-}
-
-static u16 get_app_wr_id(u64 wr_id)
-{
-       return wr_id & QMAP_IDX_MASK;
-}
-
-static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_recv_wr *recv_wr,
-                                 u32 rq_map_idx)
-{
-       u8 cnt_ds;
-       if (unlikely((recv_wr->num_sge < 0) ||
-                    (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
-       wqe_p->nr_of_data_seg = recv_wr->num_sge;
-
-       for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
-               wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
-                       recv_wr->sg_list[cnt_ds].addr;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
-                       recv_wr->sg_list[cnt_ds].lkey;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].length =
-                       recv_wr->sg_list[cnt_ds].length;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
-                            ipz_rqueue);
-               ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
-       }
-
-       return 0;
-}
-
-#if defined(DEBUG_GSI_SEND_WR)
-
-/* need ib_mad struct */
-#include <rdma/ib_mad.h>
-
-static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
-{
-       int idx;
-       int j;
-       while (ud_wr) {
-               struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
-               struct ib_sge *sge = ud_wr->wr.sg_list;
-               ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
-                            "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
-                            ud_wr->wr.num_sge, ud_wr->wr.send_flags,
-                            ud_wr->.wr.opcode);
-               if (mad_hdr) {
-                       ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
-                                    "mgmt_class=%x class_version=%x method=%x "
-                                    "status=%x class_specific=%x tid=%lx "
-                                    "attr_id=%x resv=%x attr_mod=%x",
-                                    idx, mad_hdr->base_version,
-                                    mad_hdr->mgmt_class,
-                                    mad_hdr->class_version, mad_hdr->method,
-                                    mad_hdr->status, mad_hdr->class_specific,
-                                    mad_hdr->tid, mad_hdr->attr_id,
-                                    mad_hdr->resv,
-                                    mad_hdr->attr_mod);
-               }
-               for (j = 0; j < ud_wr->wr.num_sge; j++) {
-                       u8 *data = __va(sge->addr);
-                       ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
-                                    "lkey=%x",
-                                    idx, j, data, sge->length, sge->lkey);
-                       /* assume length is n*16 */
-                       ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
-                                idx, j);
-                       sge++;
-               } /* eof for j */
-               idx++;
-               ud_wr = ud_wr(ud_wr->wr.next);
-       } /* eof while ud_wr */
-}
-
-#endif /* DEBUG_GSI_SEND_WR */
-
-static inline int ehca_write_swqe(struct ehca_qp *qp,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_send_wr *send_wr,
-                                 u32 sq_map_idx,
-                                 int hidden)
-{
-       u32 idx;
-       u64 dma_length;
-       struct ehca_av *my_av;
-       u32 remote_qkey;
-       struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
-
-       if (unlikely((send_wr->num_sge < 0) ||
-                    (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
-
-       qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
-       qmap_entry->reported = 0;
-       qmap_entry->cqe_req = 0;
-
-       switch (send_wr->opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_SEND;
-               break;
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
-               break;
-       case IB_WR_RDMA_READ:
-               wqe_p->optype = WQE_OPTYPE_RDMAREAD;
-               break;
-       default:
-               ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
-               return -EINVAL; /* invalid opcode */
-       }
-
-       wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
-
-       wqe_p->wr_flag = 0;
-
-       if ((send_wr->send_flags & IB_SEND_SIGNALED ||
-           qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-           && !hidden) {
-               wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
-               qmap_entry->cqe_req = 1;
-       }
-
-       if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
-           send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-               /* this might not work as long as HW does not support it */
-               wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
-               wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
-       }
-
-       wqe_p->nr_of_data_seg = send_wr->num_sge;
-
-       switch (qp->qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               /* no break is intential here */
-       case IB_QPT_UD:
-               /* IB 1.2 spec C10-15 compliance */
-               remote_qkey = ud_wr(send_wr)->remote_qkey;
-               if (remote_qkey & 0x80000000)
-                       remote_qkey = qp->qkey;
-
-               wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
-               wqe_p->local_ee_context_qkey = remote_qkey;
-               if (unlikely(!ud_wr(send_wr)->ah)) {
-                       ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
-                       return -EINVAL;
-               }
-               if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
-                       ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
-                       return -EINVAL;
-               }
-               my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
-               wqe_p->u.ud_av.ud_av = my_av->av;
-
-               /*
-                * omitted check of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.ud_av.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.ud_av.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.ud_av.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-               } /* eof for idx */
-               if (qp->qp_type == IB_QPT_SMI ||
-                   qp->qp_type == IB_QPT_GSI)
-                       wqe_p->u.ud_av.ud_av.pmtu = 1;
-               if (qp->qp_type == IB_QPT_GSI) {
-                       wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
-#ifdef DEBUG_GSI_SEND_WR
-                       trace_ud_wr(ud_wr(send_wr));
-#endif /* DEBUG_GSI_SEND_WR */
-               }
-               break;
-
-       case IB_QPT_UC:
-               if (send_wr->send_flags & IB_SEND_FENCE)
-                       wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
-               /* no break is intentional here */
-       case IB_QPT_RC:
-               /* TODO: atomic not implemented */
-               wqe_p->u.nud.remote_virtual_address =
-                       rdma_wr(send_wr)->remote_addr;
-               wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
-
-               /*
-                * omitted checking of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               dma_length = 0;
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.nud.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.nud.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.nud.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-                       dma_length += send_wr->sg_list[idx].length;
-               } /* eof idx */
-               wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
-
-               /* unsolicited ack circumvention */
-               if (send_wr->opcode == IB_WR_RDMA_READ) {
-                       /* on RDMA read, switch on and reset counters */
-                       qp->message_count = qp->packet_count = 0;
-                       qp->unsol_ack_circ = 1;
-               } else
-                       /* else estimate #packets */
-                       qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
-
-               break;
-
-       default:
-               ehca_gen_err("Invalid qptype=%x", qp->qp_type);
-               return -EINVAL;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
-               ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
-       }
-       return 0;
-}
-
-/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
-static inline void map_ib_wc_status(u32 cqe_status,
-                                   enum ib_wc_status *wc_status)
-{
-       if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
-               switch (cqe_status & 0x3F) {
-               case 0x01:
-               case 0x21:
-                       *wc_status = IB_WC_LOC_LEN_ERR;
-                       break;
-               case 0x02:
-               case 0x22:
-                       *wc_status = IB_WC_LOC_QP_OP_ERR;
-                       break;
-               case 0x03:
-               case 0x23:
-                       *wc_status = IB_WC_LOC_EEC_OP_ERR;
-                       break;
-               case 0x04:
-               case 0x24:
-                       *wc_status = IB_WC_LOC_PROT_ERR;
-                       break;
-               case 0x05:
-               case 0x25:
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               case 0x06:
-                       *wc_status = IB_WC_MW_BIND_ERR;
-                       break;
-               case 0x07: /* remote error - look into bits 20:24 */
-                       switch ((cqe_status
-                                & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
-                       case 0x0:
-                               /*
-                                * PSN Sequence Error!
-                                * couldn't find a matching status!
-                                */
-                               *wc_status = IB_WC_GENERAL_ERR;
-                               break;
-                       case 0x1:
-                               *wc_status = IB_WC_REM_INV_REQ_ERR;
-                               break;
-                       case 0x2:
-                               *wc_status = IB_WC_REM_ACCESS_ERR;
-                               break;
-                       case 0x3:
-                               *wc_status = IB_WC_REM_OP_ERR;
-                               break;
-                       case 0x4:
-                               *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-                               break;
-                       }
-                       break;
-               case 0x08:
-                       *wc_status = IB_WC_RETRY_EXC_ERR;
-                       break;
-               case 0x09:
-                       *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
-                       break;
-               case 0x0A:
-               case 0x2D:
-                       *wc_status = IB_WC_REM_ABORT_ERR;
-                       break;
-               case 0x0B:
-               case 0x2E:
-                       *wc_status = IB_WC_INV_EECN_ERR;
-                       break;
-               case 0x0C:
-               case 0x2F:
-                       *wc_status = IB_WC_INV_EEC_STATE_ERR;
-                       break;
-               case 0x0D:
-                       *wc_status = IB_WC_BAD_RESP_ERR;
-                       break;
-               case 0x10:
-                       /* WQE purged */
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               default:
-                       *wc_status = IB_WC_FATAL_ERR;
-
-               }
-       } else
-               *wc_status = IB_WC_SUCCESS;
-}
-
-static inline int post_one_send(struct ehca_qp *my_qp,
-                        struct ib_send_wr *cur_send_wr,
-                        int hidden)
-{
-       struct ehca_wqe *wqe_p;
-       int ret;
-       u32 sq_map_idx;
-       u64 start_offset = my_qp->ipz_squeue.current_q_offset;
-
-       /* get pointer next to free WQE */
-       wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
-       if (unlikely(!wqe_p)) {
-               /* too many posted work requests: queue overflow */
-               ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -ENOMEM;
-       }
-
-       /*
-        * Get the index of the WQE in the send queue. The same index is used
-        * for writing into the sq_map.
-        */
-       sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
-
-       /* write a SEND WQE into the QUEUE */
-       ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
-       /*
-        * if something failed,
-        * reset the free entry pointer to the start value
-        */
-       if (unlikely(ret)) {
-               my_qp->ipz_squeue.current_q_offset = start_offset;
-               ehca_err(my_qp->ib_qp.device, "Could not write WQE "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_post_send(struct ib_qp *qp,
-                  struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       int wqe_cnt = 0;
-       int ret = 0;
-       unsigned long flags;
-
-       /* Reject WR if QP is in RESET, INIT or RTR state */
-       if (unlikely(my_qp->state < IB_QPS_RTS)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-
-       /* Send an empty extra RDMA read if:
-        *  1) there has been an RDMA read on this connection before
-        *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
-        *  3) we can be sure that any previous extra RDMA read has been
-        *     processed so we don't overflow the SQ
-        */
-       if (unlikely(my_qp->unsol_ack_circ &&
-                    my_qp->packet_count > ACK_CIRC_THRESHOLD &&
-                    my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
-               /* insert an empty RDMA READ to fix up the remote QP state */
-               struct ib_send_wr circ_wr;
-               memset(&circ_wr, 0, sizeof(circ_wr));
-               circ_wr.opcode = IB_WR_RDMA_READ;
-               post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
-               wqe_cnt++;
-               ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
-               my_qp->message_count = my_qp->packet_count = 0;
-       }
-
-       /* loop processes list of send reqs */
-       while (send_wr) {
-               ret = post_one_send(my_qp, send_wr, 0);
-               if (unlikely(ret)) {
-                       goto post_send_exit0;
-               }
-               wqe_cnt++;
-               send_wr = send_wr->next;
-       }
-
-post_send_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_sqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-               ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                        my_qp, qp->qp_num, wqe_cnt, ret);
-       my_qp->message_count += wqe_cnt;
-       spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-
-out:
-       if (ret)
-               *bad_send_wr = send_wr;
-       return ret;
-}
-
-static int internal_post_recv(struct ehca_qp *my_qp,
-                             struct ib_device *dev,
-                             struct ib_recv_wr *recv_wr,
-                             struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_wqe *wqe_p;
-       int wqe_cnt = 0;
-       int ret = 0;
-       u32 rq_map_idx;
-       unsigned long flags;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (unlikely(!HAS_RQ(my_qp))) {
-               ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
-                        my_qp, my_qp->real_qp_num, my_qp->ext_type);
-               ret = -ENODEV;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_r, flags);
-
-       /* loop processes list of recv reqs */
-       while (recv_wr) {
-               u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
-               /* get pointer next to free WQE */
-               wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
-               if (unlikely(!wqe_p)) {
-                       /* too many posted work requests: queue overflow */
-                       ret = -ENOMEM;
-                       ehca_err(dev, "Too many posted WQEs "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-               /*
-                * Get the index of the WQE in the recv queue. The same index
-                * is used for writing into the rq_map.
-                */
-               rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
-
-               /* write a RECV WQE into the QUEUE */
-               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
-                               rq_map_idx);
-               /*
-                * if something failed,
-                * reset the free entry pointer to the start value
-                */
-               if (unlikely(ret)) {
-                       my_qp->ipz_rqueue.current_q_offset = start_offset;
-                       ret = -EINVAL;
-                       ehca_err(dev, "Could not write WQE "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-
-               qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-               qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
-               qmap_entry->reported = 0;
-               qmap_entry->cqe_req = 1;
-
-               wqe_cnt++;
-               recv_wr = recv_wr->next;
-       } /* eof for recv_wr */
-
-post_recv_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_rqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-           ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                    my_qp, my_qp->real_qp_num, wqe_cnt, ret);
-       spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
-
-out:
-       if (ret)
-               *bad_recv_wr = recv_wr;
-
-       return ret;
-}
-
-int ehca_post_recv(struct ib_qp *qp,
-                  struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-
-       /* Reject WR if QP is in RESET state */
-       if (unlikely(my_qp->state == IB_QPS_RESET)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               *bad_recv_wr = recv_wr;
-               return -EINVAL;
-       }
-
-       return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
-}
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr)
-{
-       return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
-                                 srq->device, recv_wr, bad_recv_wr);
-}
-
-/*
- * ib_wc_opcode table converts ehca wc opcode to ib
- * Since we use zero to indicate invalid opcode, the actual ib opcode must
- * be decremented!!!
- */
-static const u8 ib_wc_opcode[255] = {
-       [0x01] = IB_WC_RECV+1,
-       [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
-       [0x04] = IB_WC_BIND_MW+1,
-       [0x08] = IB_WC_FETCH_ADD+1,
-       [0x10] = IB_WC_COMP_SWAP+1,
-       [0x20] = IB_WC_RDMA_WRITE+1,
-       [0x40] = IB_WC_RDMA_READ+1,
-       [0x80] = IB_WC_SEND+1
-};
-
-/* internal function to poll one entry of cq */
-static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
-{
-       int ret = 0, qmap_tail_idx;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       struct ehca_cqe *cqe;
-       struct ehca_qp *my_qp;
-       struct ehca_qmap_entry *qmap_entry;
-       struct ehca_queue_map *qmap;
-       int cqe_count = 0, is_error;
-
-repoll:
-       cqe = (struct ehca_cqe *)
-               ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
-       if (!cqe) {
-               ret = -EAGAIN;
-               if (ehca_debug_level >= 3)
-                       ehca_dbg(cq->device, "Completion queue is empty  "
-                                "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
-               goto poll_cq_one_exit0;
-       }
-
-       /* prevents loads being reordered across this point */
-       rmb();
-
-       cqe_count++;
-       if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
-               struct ehca_qp *qp;
-               int purgeflag;
-               unsigned long flags;
-
-               qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
-               if (!qp) {
-                       ehca_err(cq->device, "cq_num=%x qp_num=%x "
-                                "could not find qp -> ignore cqe",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       /* ignore this purged cqe */
-                       goto repoll;
-               }
-               spin_lock_irqsave(&qp->spinlock_s, flags);
-               purgeflag = qp->sqerr_purgeflag;
-               spin_unlock_irqrestore(&qp->spinlock_s, flags);
-
-               if (purgeflag) {
-                       ehca_dbg(cq->device,
-                                "Got CQE with purged bit qp_num=%x src_qp=%x",
-                                cqe->local_qp_number, cqe->remote_qp_number);
-                       if (ehca_debug_level >= 2)
-                               ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
-                                        cqe->local_qp_number,
-                                        cqe->remote_qp_number);
-                       /*
-                        * ignore this to avoid double cqes of bad wqe
-                        * that caused sqe and turn off purge flag
-                        */
-                       qp->sqerr_purgeflag = 0;
-                       goto repoll;
-               }
-       }
-
-       is_error = cqe->status & WC_STATUS_ERROR_BIT;
-
-       /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
-       if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
-               ehca_dbg(cq->device,
-                        "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
-                        is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               ehca_dbg(cq->device,
-                        "ehca_cq=%p cq_num=%x -------------------------",
-                        my_cq, my_cq->cq_number);
-       }
-
-       read_lock(&ehca_qp_idr_lock);
-       my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
-       read_unlock(&ehca_qp_idr_lock);
-       if (!my_qp)
-               goto repoll;
-       wc->qp = &my_qp->ib_qp;
-
-       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-               /* We got a send completion. */
-               qmap = &my_qp->sq_map;
-       else
-               /* We got a receive completion. */
-               qmap = &my_qp->rq_map;
-
-       /* advance the tail pointer */
-       qmap->tail = qmap_tail_idx;
-
-       if (is_error) {
-               /*
-                * set left_to_poll to 0 because in error state, we will not
-                * get any additional CQEs
-                */
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               my_qp->sq_map.left_to_poll = 0;
-               ehca_add_to_err_list(my_qp, 1);
-
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               my_qp->rq_map.left_to_poll = 0;
-               if (HAS_RQ(my_qp))
-                       ehca_add_to_err_list(my_qp, 0);
-       }
-
-       qmap_entry = &qmap->map[qmap_tail_idx];
-       if (qmap_entry->reported) {
-               ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-                               my_qp->real_qp_num);
-               /* found a double cqe, discard it and read next one */
-               goto repoll;
-       }
-
-       wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
-       qmap_entry->reported = 1;
-
-       /* if left_to_poll is decremented to 0, add the QP to the error list */
-       if (qmap->left_to_poll > 0) {
-               qmap->left_to_poll--;
-               if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-                       ehca_add_to_err_list(my_qp, 1);
-                       if (HAS_RQ(my_qp))
-                               ehca_add_to_err_list(my_qp, 0);
-               }
-       }
-
-       /* eval ib_wc_opcode */
-       wc->opcode = ib_wc_opcode[cqe->optype]-1;
-       if (unlikely(wc->opcode == -1)) {
-               ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
-                        "ehca_cq=%p cq_num=%x",
-                        cqe->optype, cqe->status, my_cq, my_cq->cq_number);
-               /* dump cqe for other infos */
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               /* update also queue adder to throw away this entry!!! */
-               goto repoll;
-       }
-
-       /* eval ib_wc_status */
-       if (unlikely(is_error)) {
-               /* complete with errors */
-               map_ib_wc_status(cqe->status, &wc->status);
-               wc->vendor_err = wc->status;
-       } else
-               wc->status = IB_WC_SUCCESS;
-
-       wc->byte_len = cqe->nr_bytes_transferred;
-       wc->pkey_index = cqe->pkey_index;
-       wc->slid = cqe->rlid;
-       wc->dlid_path_bits = cqe->dlid;
-       wc->src_qp = cqe->remote_qp_number;
-       /*
-        * HW has "Immed data present" and "GRH present" in bits 6 and 5.
-        * SW defines those in bits 1 and 0, so we can just shift and mask.
-        */
-       wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
-       wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
-       wc->sl = cqe->service_level;
-
-poll_cq_one_exit0:
-       if (cqe_count > 0)
-               hipz_update_feca(my_cq, cqe_count);
-
-       return ret;
-}
-
-static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
-                              struct ib_wc *wc, int num_entries,
-                              struct ipz_queue *ipz_queue, int on_sq)
-{
-       int nr = 0;
-       struct ehca_wqe *wqe;
-       u64 offset;
-       struct ehca_queue_map *qmap;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (on_sq)
-               qmap = &my_qp->sq_map;
-       else
-               qmap = &my_qp->rq_map;
-
-       qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-       while ((nr < num_entries) && (qmap_entry->reported == 0)) {
-               /* generate flush CQE */
-
-               memset(wc, 0, sizeof(*wc));
-
-               offset = qmap->next_wqe_idx * ipz_queue->qe_size;
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
-               if (!wqe) {
-                       ehca_err(cq->device, "Invalid wqe offset=%#llx on "
-                                "qp_num=%#x", offset, my_qp->real_qp_num);
-                       return nr;
-               }
-
-               wc->wr_id = replace_wr_id(wqe->work_request_id,
-                                         qmap_entry->app_wr_id);
-
-               if (on_sq) {
-                       switch (wqe->optype) {
-                       case WQE_OPTYPE_SEND:
-                               wc->opcode = IB_WC_SEND;
-                               break;
-                       case WQE_OPTYPE_RDMAWRITE:
-                               wc->opcode = IB_WC_RDMA_WRITE;
-                               break;
-                       case WQE_OPTYPE_RDMAREAD:
-                               wc->opcode = IB_WC_RDMA_READ;
-                               break;
-                       default:
-                               ehca_err(cq->device, "Invalid optype=%x",
-                                               wqe->optype);
-                               return nr;
-                       }
-               } else
-                       wc->opcode = IB_WC_RECV;
-
-               if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
-                       wc->ex.imm_data = wqe->immediate_data;
-                       wc->wc_flags |= IB_WC_WITH_IMM;
-               }
-
-               wc->status = IB_WC_WR_FLUSH_ERR;
-
-               wc->qp = &my_qp->ib_qp;
-
-               /* mark as reported and advance next_wqe pointer */
-               qmap_entry->reported = 1;
-               qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
-                                               qmap->entries);
-               qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-               wc++; nr++;
-       }
-
-       return nr;
-
-}
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int nr;
-       struct ehca_qp *err_qp;
-       struct ib_wc *current_wc = wc;
-       int ret = 0;
-       unsigned long flags;
-       int entries_left = num_entries;
-
-       if (num_entries < 1) {
-               ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
-                        "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
-               ret = -EINVAL;
-               goto poll_cq_exit0;
-       }
-
-       spin_lock_irqsave(&my_cq->spinlock, flags);
-
-       /* generate flush cqes for send queues */
-       list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_squeue, 1);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       /* generate flush cqes for receive queues */
-       list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_rqueue, 0);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       for (nr = 0; nr < entries_left; nr++) {
-               ret = ehca_poll_cq_one(cq, current_wc);
-               if (ret)
-                       break;
-               current_wc++;
-       } /* eof for nr */
-       entries_left -= nr;
-
-       spin_unlock_irqrestore(&my_cq->spinlock, flags);
-       if (ret == -EAGAIN  || !ret)
-               ret = num_entries - entries_left;
-
-poll_cq_exit0:
-       return ret;
-}
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int ret = 0;
-
-       switch (notify_flags & IB_CQ_SOLICITED_MASK) {
-       case IB_CQ_SOLICITED:
-               hipz_set_cqx_n0(my_cq, 1);
-               break;
-       case IB_CQ_NEXT_COMP:
-               hipz_set_cqx_n1(my_cq, 1);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               unsigned long spl_flags;
-               spin_lock_irqsave(&my_cq->spinlock, spl_flags);
-               ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
-               spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
deleted file mode 100644 (file)
index 376b031..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  SQP functions
- *
- *  Authors: Khadija Souissi <souissi@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rdma/ib_mad.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define IB_MAD_STATUS_REDIRECT         cpu_to_be16(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION    cpu_to_be16(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD     cpu_to_be16(0x0008)
-
-#define IB_PMA_CLASS_PORT_INFO         cpu_to_be16(0x0001)
-
-/**
- * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
- * pair is created successfully, the corresponding port gets active.
- *
- * Define Special Queue pair 0 (SMI QP) is still not supported.
- *
- * @qp_init_attr: Queue pair init attributes with port and queue pair type
- */
-
-u64 ehca_define_sqp(struct ehca_shca *shca,
-                   struct ehca_qp *ehca_qp,
-                   struct ib_qp_init_attr *qp_init_attr)
-{
-       u32 pma_qp_nr, bma_qp_nr;
-       u64 ret;
-       u8 port = qp_init_attr->port_num;
-       int counter;
-
-       shca->sport[port - 1].port_state = IB_PORT_DOWN;
-
-       switch (qp_init_attr->qp_type) {
-       case IB_QPT_SMI:
-               /* function not supported yet */
-               break;
-       case IB_QPT_GSI:
-               ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
-                                        ehca_qp->ipz_qp_handle,
-                                        ehca_qp->galpas.kernel,
-                                        (u32) qp_init_attr->port_num,
-                                        &pma_qp_nr, &bma_qp_nr);
-
-               if (ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device,
-                                "Can't define AQP1 for port %x. h_ret=%lli",
-                                port, ret);
-                       return ret;
-               }
-               shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
-               ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
-                        port, pma_qp_nr);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "invalid qp_type=%x",
-                        qp_init_attr->qp_type);
-               return H_PARAMETER;
-       }
-
-       if (ehca_nr_ports < 0) /* autodetect mode */
-               return H_SUCCESS;
-
-       for (counter = 0;
-            shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
-                    counter < ehca_port_act_time;
-            counter++) {
-               ehca_dbg(&shca->ib_device, "... wait until port %x is active",
-                        port);
-               msleep_interruptible(1000);
-       }
-
-       if (counter == ehca_port_act_time) {
-               ehca_err(&shca->ib_device, "Port %x is not active.", port);
-               return H_HARDWARE;
-       }
-
-       return H_SUCCESS;
-}
-
-struct ib_perf {
-       struct ib_mad_hdr mad_hdr;
-       u8 reserved[40];
-       u8 data[192];
-} __attribute__ ((packed));
-
-/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
-struct tcslfl {
-       u32 tc:8;
-       u32 sl:4;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-/* IP Version/TC/FL packed into 32 bits, as in GRH */
-struct vertcfl {
-       u32 ver:4;
-       u32 tc:8;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
-                            const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                            const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-       const struct ib_perf *in_perf = (const struct ib_perf *)in_mad;
-       struct ib_perf *out_perf = (struct ib_perf *)out_mad;
-       struct ib_class_port_info *poi =
-               (struct ib_class_port_info *)out_perf->data;
-       struct tcslfl *tcslfl =
-               (struct tcslfl *)&poi->redirect_tcslfl;
-       struct ehca_shca *shca =
-               container_of(ibdev, struct ehca_shca, ib_device);
-       struct ehca_sport *sport = &shca->sport[port_num - 1];
-
-       ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
-
-       *out_mad = *in_mad;
-
-       if (in_perf->mad_hdr.class_version != 1) {
-               ehca_warn(ibdev, "Unsupported class_version=%x",
-                         in_perf->mad_hdr.class_version);
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
-               goto perf_reply;
-       }
-
-       switch (in_perf->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-       case IB_MGMT_METHOD_SET:
-               /* set class port info for redirection */
-               out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
-               out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
-               memset(poi, 0, sizeof(*poi));
-               poi->base_version = 1;
-               poi->class_version = 1;
-               poi->resp_time_value = 18;
-
-               /* copy local routing information from WC where applicable */
-               tcslfl->sl         = in_wc->sl;
-               poi->redirect_lid  =
-                       sport->saved_attr.lid | in_wc->dlid_path_bits;
-               poi->redirect_qp   = sport->pma_qp_nr;
-               poi->redirect_qkey = IB_QP1_QKEY;
-
-               ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
-                               &poi->redirect_pkey);
-
-               /* if request was globally routed, copy route info */
-               if (in_grh) {
-                       const struct vertcfl *vertcfl =
-                               (const struct vertcfl *)&in_grh->version_tclass_flow;
-                       memcpy(poi->redirect_gid, in_grh->dgid.raw,
-                              sizeof(poi->redirect_gid));
-                       tcslfl->tc        = vertcfl->tc;
-                       tcslfl->fl        = vertcfl->fl;
-               } else
-                       /* else only fill in default GID */
-                       ehca_query_gid(ibdev, port_num, 0,
-                                      (union ib_gid *)&poi->redirect_gid);
-
-               ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
-                        sport->saved_attr.lid, sport->pma_qp_nr);
-               break;
-
-       case IB_MGMT_METHOD_GET_RESP:
-               return IB_MAD_RESULT_FAILURE;
-
-       default:
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
-               break;
-       }
-
-perf_reply:
-       out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
-
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
-               return IB_MAD_RESULT_FAILURE;
-
-       /* accept only pma request */
-       if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
-               return IB_MAD_RESULT_SUCCESS;
-
-       ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-       ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
-                               in_mad, out_mad);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
deleted file mode 100644 (file)
index d280b12..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  auxiliary functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef EHCA_TOOLS_H
-#define EHCA_TOOLS_H
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/vmalloc.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/device.h>
-
-#include <linux/atomic.h>
-#include <asm/ibmebus.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/hvcall.h>
-
-extern int ehca_debug_level;
-
-#define ehca_dbg(ib_dev, format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
-                                  "PU%04x EHCA_DBG:%s " format "\n", \
-                                  raw_smp_processor_id(), __func__, \
-                                  ## arg); \
-       } while (0)
-
-#define ehca_info(ib_dev, format, arg...) \
-       dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_warn(ib_dev, format, arg...) \
-       dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_err(ib_dev, format, arg...) \
-       dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-               raw_smp_processor_id(), __func__, ## arg)
-
-/* use this one only if no ib_dev available */
-#define ehca_gen_dbg(format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-                              raw_smp_processor_id(), __func__, ## arg); \
-       } while (0)
-
-#define ehca_gen_warn(format, arg...) \
-       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_gen_err(format, arg...) \
-       printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-/**
- * ehca_dmp - printk a memory block, whose length is n*8 bytes.
- * Each line has the following layout:
- * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
- */
-#define ehca_dmp(adr, len, format, args...) \
-       do { \
-               unsigned int x; \
-               unsigned int l = (unsigned int)(len); \
-               unsigned char *deb = (unsigned char *)(adr); \
-               for (x = 0; x < l; x += 16) { \
-                       printk(KERN_INFO "EHCA_DMP:%s " format \
-                              " adr=%p ofs=%04x %016llx %016llx\n", \
-                              __func__, ##args, deb, x, \
-                              *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
-                       deb += 16; \
-               } \
-       } while (0)
-
-/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
-
-/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
-
-/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
-
-/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
-
-/**
- * EHCA_BMASK_SET - return value shifted and masked by mask
- * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
- * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
- * in variable
- */
-#define EHCA_BMASK_SET(mask, value) \
-       ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
-
-/**
- * EHCA_BMASK_GET - extract a parameter from value by mask
- */
-#define EHCA_BMASK_GET(mask, value) \
-       (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
-
-/* Converts ehca to ib return code */
-int ehca2ib_return_code(u64 ehca_rc);
-
-#endif /* EHCA_TOOLS_H */
diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
deleted file mode 100644 (file)
index 1a1d5d9..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  userspace support verbs
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata)
-{
-       struct ehca_ucontext *my_context;
-
-       my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
-       if (!my_context) {
-               ehca_err(device, "Out of memory device=%p", device);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       return &my_context->ib_ucontext;
-}
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
-       return 0;
-}
-
-static void ehca_mm_open(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)++;
-       if (!(*count))
-               ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static void ehca_mm_close(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)--;
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static const struct vm_operations_struct vm_ops = {
-       .open = ehca_mm_open,
-       .close = ehca_mm_close,
-};
-
-static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
-                       u32 *mm_count)
-{
-       int ret;
-       u64 vsize, physical;
-
-       vsize = vma->vm_end - vma->vm_start;
-       if (vsize < EHCA_PAGESIZE) {
-               ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
-               return -EINVAL;
-       }
-
-       physical = galpas->user.fw_handle;
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-       /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
-       ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
-                          vma->vm_page_prot);
-       if (unlikely(ret)) {
-               ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
-               return -ENOMEM;
-       }
-
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
-                          u32 *mm_count)
-{
-       int ret;
-       u64 start, ofs;
-       struct page *page;
-
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       start = vma->vm_start;
-       for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
-               u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
-               page = virt_to_page(virt_addr);
-               ret = vm_insert_page(vma, start, page);
-               if (unlikely(ret)) {
-                       ehca_gen_err("vm_insert_page() failed rc=%i", ret);
-                       return ret;
-               }
-               start += PAGE_SIZE;
-       }
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
-               ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_fw() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* cq queue_addr */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
-               ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_queue() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
-                        rsrc_type, cq->cq_number);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
-               ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "remap_pfn_range() failed ret=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return -ENOMEM;
-               }
-               break;
-
-       case 1: /* qp rqueue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
-                                     &qp->mm_count_rqueue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       case 2: /* qp squeue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
-                                     &qp->mm_count_squeue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
-                        rsrc_type, qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       u64 fileoffset = vma->vm_pgoff;
-       u32 idr_handle = fileoffset & 0x1FFFFFF;
-       u32 q_type = (fileoffset >> 27) & 0x1;    /* CQ, QP,...        */
-       u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-       u32 ret;
-       struct ehca_cq *cq;
-       struct ehca_qp *qp;
-       struct ib_uobject *uobject;
-
-       switch (q_type) {
-       case  0: /* CQ */
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, idr_handle);
-               read_unlock(&ehca_cq_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!cq)
-                       return -EINVAL;
-
-               if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_cq(vma, cq, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_cq() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* QP */
-               read_lock(&ehca_qp_idr_lock);
-               qp = idr_find(&ehca_qp_idr, idr_handle);
-               read_unlock(&ehca_qp_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!qp)
-                       return -EINVAL;
-
-               uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
-               if (!uobject || uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_qp(vma, qp, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_qp() failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_gen_err("bad queue type %x", q_type);
-               return -EINVAL;
-       }
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
deleted file mode 100644 (file)
index 89517ff..0000000
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <asm/hvcall.h>
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hcp_phyp.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
-#define H_ALL_RES_QP_STORAGE            EHCA_BMASK_IBM(16, 17)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
-#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
-#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
-
-#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
-#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
-#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
-
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
-
-#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
-#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
-#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
-
-#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
-#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
-#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
-
-static DEFINE_SPINLOCK(hcall_lock);
-
-static long ehca_plpar_hcall_norets(unsigned long opcode,
-                                   unsigned long arg1,
-                                   unsigned long arg2,
-                                   unsigned long arg3,
-                                   unsigned long arg4,
-                                   unsigned long arg5,
-                                   unsigned long arg6,
-                                   unsigned long arg7)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
-                            opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
-                                        arg5, arg6, arg7);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS)
-                       ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
-                                    opcode, ret, arg1, arg2, arg3,
-                                    arg4, arg5, arg6, arg7);
-               else
-                       if (unlikely(ehca_debug_level >= 2))
-                               ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
-
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-static long ehca_plpar_hcall9(unsigned long opcode,
-                             unsigned long *outs, /* array of 9 outputs */
-                             unsigned long arg1,
-                             unsigned long arg2,
-                             unsigned long arg3,
-                             unsigned long arg4,
-                             unsigned long arg5,
-                             unsigned long arg6,
-                             unsigned long arg7,
-                             unsigned long arg8,
-                             unsigned long arg9)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
-                            arg1, arg2, arg3, arg4, arg5,
-                            arg6, arg7, arg8, arg9);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall9(opcode, outs,
-                                  arg1, arg2, arg3, arg4, arg5,
-                                  arg6, arg7, arg8, arg9);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS) {
-                       ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
-                                    opcode, arg1, arg2, arg3, arg4, arg5,
-                                    arg6, arg7, arg8, arg9);
-                       ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               } else if (unlikely(ehca_debug_level >= 2))
-                       ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 *act_nr_of_entries,
-                            u32 *act_pages,
-                            u32 *eq_ist)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       u64 allocate_controls;
-
-       /* resource type */
-       allocate_controls = 3ULL;
-
-       /* ISN is associated */
-       if (neq_control != 1)
-               allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
-       else /* notification event queue */
-               allocate_controls = (1ULL << 63) | allocate_controls;
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,  /* r4 */
-                               allocate_controls,      /* r5 */
-                               number_of_entries,      /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       eq_handle->handle = outs[0];
-       *act_nr_of_entries = (u32)outs[3];
-       *act_pages = (u32)outs[4];
-       *eq_ist = (u32)outs[5];
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resource - ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask)
-{
-       return ehca_plpar_hcall_norets(H_RESET_EVENTS,
-                                      adapter_handle.handle, /* r4 */
-                                      eq_handle.handle,      /* r5 */
-                                      event_mask,            /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param)
-{
-       int rc;
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,   /* r4  */
-                               2,                       /* r5  */
-                               param->eq_handle.handle, /* r6  */
-                               cq->token,               /* r7  */
-                               param->nr_cqe,           /* r8  */
-                               0, 0, 0, 0);
-       cq->ipz_cq_handle.handle = outs[0];
-       param->act_nr_of_entries = (u32)outs[3];
-       param->act_pages = (u32)outs[4];
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[5]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               cq->ipz_cq_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user)
-{
-       int rc;
-       u64 ret;
-       u64 allocate_controls, max_r10_reg, r11, r12;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       allocate_controls =
-               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
-                                parms->squeue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
-                                parms->rqueue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_RECV_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_SEND_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
-                                parms->ud_av_l_key_ctl)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
-
-       max_r10_reg =
-               EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-                              parms->squeue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-                                parms->rqueue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
-                                parms->squeue.max_sge)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
-                                parms->rqueue.max_sge);
-
-       r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
-
-       if (parms->ext_type == EQPT_SRQ)
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
-       else
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,             /* r4  */
-                               allocate_controls,                 /* r5  */
-                               parms->send_cq_handle.handle,
-                               parms->recv_cq_handle.handle,
-                               parms->eq_handle.handle,
-                               ((u64)parms->token << 32) | parms->pd.value,
-                               max_r10_reg, r11, r12);
-
-       parms->qp_handle.handle = outs[0];
-       parms->real_qp_num = (u32)outs[1];
-       parms->squeue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
-       parms->rqueue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
-       parms->squeue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
-       parms->rqueue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
-       parms->squeue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
-       parms->rqueue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[6]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               parms->qp_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block)
-{
-       u64 ret;
-       u64 r_cb = __pa(query_port_response_block);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response block not page aligned");
-               return H_PARAMETER;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
-                                     adapter_handle.handle, /* r4 */
-                                     port_id,               /* r5 */
-                                     r_cb,                  /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(query_port_response_block, 64, "response_block");
-
-       return ret;
-}
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask)
-{
-       u64 port_attributes = port_cap;
-
-       if (modify_mask & IB_PORT_SHUTDOWN)
-               port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
-       if (modify_mask & IB_PORT_INIT_TYPE)
-               port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
-       if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
-
-       return ehca_plpar_hcall_norets(H_MODIFY_PORT,
-                                      adapter_handle.handle, /* r4 */
-                                      port_id,               /* r5 */
-                                      port_attributes,       /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock)
-{
-       u64 r_cb = __pa(query_hca_rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response_block=%p not page aligned",
-                            query_hca_rblock);
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_QUERY_HCA,
-                                      adapter_handle.handle, /* r4 */
-                                      r_cb,                  /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count)
-{
-       return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
-                                      adapter_handle.handle,      /* r4  */
-                                      (u64)queue_type | ((u64)pagesize) << 8,
-                                      /* r5  */
-                                      resource_handle,            /* r6  */
-                                      logical_address_of_page,    /* r7  */
-                                      count,                      /* r8  */
-                                      0, 0);
-}
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       if (count != 1) {
-               ehca_gen_err("Ppage counter=%llx", count);
-               return H_PARAMETER;
-       }
-       return hipz_h_register_rpage(adapter_handle,
-                                    pagesize,
-                                    queue_type,
-                                    eq_handle.handle,
-                                    logical_address_of_page, count);
-}
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
-                          u32 ist)
-{
-       u64 ret;
-       ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
-                                     adapter_handle.handle, /* r4 */
-                                     ist,                   /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret != H_SUCCESS && ret != H_BUSY)
-               ehca_gen_err("Could not query interrupt state.");
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal)
-{
-       if (count != 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    cq_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa)
-{
-       if (count > 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    qp_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe2processed,
-                              void **log_addr_next_rq_wqe2processed,
-                              int dis_and_get_function_code)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               dis_and_get_function_code, /* r5 */
-                               qp_handle.handle,          /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (log_addr_next_sq_wqe2processed)
-               *log_addr_next_sq_wqe2processed = (void *)outs[0];
-       if (log_addr_next_rq_wqe2processed)
-               *log_addr_next_rq_wqe2processed = (void *)outs[1];
-
-       return ret;
-}
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               update_mask,           /* r6 */
-                               __pa(mqpcb),           /* r7 */
-                               0, 0, 0, 0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Insufficient resources ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal)
-{
-       return ehca_plpar_hcall_norets(H_QUERY_QP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      __pa(qqpcb),           /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = hcp_galpas_dtor(&qp->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct qp->galpas");
-               return H_RESOURCE;
-       }
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               /* function code */
-                               1,                         /* r5 */
-                               qp->ipz_qp_handle.handle,  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (ret == H_HARDWARE)
-               ehca_gen_err("HCA not operational. ret=%lli", ret);
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     qp->ipz_qp_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource still in use. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port)
-{
-       return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      port,                  /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               port,                  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       *pma_qp_nr = (u32)outs[0];
-       *bma_qp_nr = (u32)outs[1];
-
-       if (ret == H_ALIAS_EXIST)
-               ehca_gen_err("AQP1 already exists. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       u64 ret;
-
-       ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
-                                     adapter_handle.handle,  /* r4 */
-                                     qp_handle.handle,       /* r5 */
-                                     mcg_dlid,               /* r6 */
-                                     interface_id,           /* r7 */
-                                     subnet_prefix,          /* r8 */
-                                     0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       return ehca_plpar_hcall_norets(H_DETACH_MCQP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      mcg_dlid,              /* r6 */
-                                      interface_id,          /* r7 */
-                                      subnet_prefix,         /* r8 */
-                                      0, 0);
-}
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&cq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct cp->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     cq->ipz_cq_handle.handle,  /* r5 */
-                                     force_flag != 0 ? 1L : 0L, /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&eq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct eq->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     eq->ipz_eq_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource in use. ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,            /* r4 */
-                               5,                                /* r5 */
-                               vaddr,                            /* r6 */
-                               length,                           /* r7 */
-                               (((u64)access_ctrl) << 32ULL),    /* r8 */
-                               pd.value,                         /* r9 */
-                               0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       u64 ret;
-
-       if (unlikely(ehca_debug_level >= 3)) {
-               if (count > 1) {
-                       u64 *kpage;
-                       int i;
-                       kpage = __va(logical_address_of_page);
-                       for (i = 0; i < count; i++)
-                               ehca_gen_dbg("kpage[%d]=%p",
-                                            i, (void *)kpage[i]);
-               } else
-                       ehca_gen_dbg("kpage=%p",
-                                    (void *)logical_address_of_page);
-       }
-
-       if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
-               ehca_gen_err("logical_address_of_page not on a 4k boundary "
-                            "adapter_handle=%llx mr=%p mr_handle=%llx "
-                            "pagesize=%x queue_type=%x "
-                            "logical_address_of_page=%llx count=%llx",
-                            adapter_handle.handle, mr,
-                            mr->ipz_mr_handle.handle, pagesize, queue_type,
-                            logical_address_of_page, count);
-               ret = H_PARAMETER;
-       } else
-               ret = hipz_h_register_rpage(adapter_handle, pagesize,
-                                           queue_type,
-                                           mr->ipz_mr_handle.handle,
-                                           logical_address_of_page, count);
-       return ret;
-}
-
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
-                               adapter_handle.handle,     /* r4 */
-                               mr->ipz_mr_handle.handle,  /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->len = outs[0];
-       outparms->vaddr = outs[1];
-       outparms->acl  = outs[4] >> 32;
-       outparms->lkey = (u32)(outs[5] >> 32);
-       outparms->rkey = (u32)(outs[5] & (0xffffffff));
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mr->ipz_mr_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mr->ipz_mr_handle.handle, /* r5 */
-                               vaddr_in,                 /* r6 */
-                               length,                   /* r7 */
-                               /* r8 */
-                               ((((u64)access_ctrl) << 32ULL) | pd.value),
-                               mr_addr_cb,               /* r9 */
-                               0, 0, 0);
-       outparms->vaddr = outs[1];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
-                               adapter_handle.handle,            /* r4 */
-                               orig_mr->ipz_mr_handle.handle,    /* r5 */
-                               vaddr_in,                         /* r6 */
-                               (((u64)access_ctrl) << 32ULL),    /* r7 */
-                               pd.value,                         /* r8 */
-                               0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,      /* r4 */
-                               6,                          /* r5 */
-                               pd.value,                   /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mw->ipz_mw_handle.handle, /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mw->ipz_mw_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count)
-{
-       u64 r_cb = __pa(rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("rblock not page aligned.");
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_ERROR_DATA,
-                                      adapter_handle.handle,
-                                      ressource_handle,
-                                      r_cb,
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_eoi(int irq)
-{
-       unsigned long xirr;
-
-       iosync();
-       xirr = (0xffULL << 24) | irq;
-
-       return plpar_hcall_norets(H_EOI, xirr);
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
deleted file mode 100644 (file)
index a46e514..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_IF_H__
-#define __HCP_IF_H__
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "hipz_hw.h"
-
-/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
- * resources, create the empty EQPT (ring).
- */
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 * act_nr_of_entries,
-                            u32 * act_pages,
-                            u32 * eq_ist);
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask);
-/*
- * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
- * resources, create the empty CQPT (ring).
- */
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param);
-
-
-/*
- * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
- * initialize resources, create empty QPPTs (2 rings).
- */
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user);
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block);
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask);
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock);
-
-/*
- * hipz_h_register_rpage internal function in hcp_if.h for all
- * hcp_H_REGISTER_RPAGE calls.
- */
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count);
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle
-                          hcp_adapter_handle,
-                          u32 ist);
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal);
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa);
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe_tb_processed,
-                              void **log_addr_next_rq_wqe_tb_processed,
-                              int dis_and_get_function_code);
-enum hcall_sigt {
-       HCALL_SIGT_NO_CQE = 0,
-       HCALL_SIGT_BY_WQE = 1,
-       HCALL_SIGT_EVERY = 2
-};
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal);
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal);
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp);
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port);
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr);
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag);
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq);
-
-/*
- * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-/* hipz_h_query_mr queries MR in HW and FW */
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mr frees MR resources in HW and FW */
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr);
-
-/* hipz_h_reregister_pmr reregisters MR in HW and FW */
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_smr register shared MR in HW and FW */
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms);
-
-/*
- * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_query_mw queries MW in HW and FW */
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mw frees MW resources in HW and FW */
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw);
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count);
-u64 hipz_h_eoi(int irq);
-
-#endif /* __HCP_IF_H__ */
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
deleted file mode 100644 (file)
index 077376f..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *   load store abstraction for ehca register access with tracing
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-u64 hcall_map_page(u64 physaddr)
-{
-       return (u64)ioremap(physaddr, EHCA_PAGESIZE);
-}
-
-int hcall_unmap_page(u64 mapaddr)
-{
-       iounmap((volatile void __iomem *) mapaddr);
-       return 0;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user)
-{
-       if (!is_user) {
-               galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
-               if (!galpas->kernel.fw_handle)
-                       return -ENOMEM;
-       } else
-               galpas->kernel.fw_handle = 0;
-
-       galpas->user.fw_handle = paddr_user;
-
-       return 0;
-}
-
-int hcp_galpas_dtor(struct h_galpas *galpas)
-{
-       if (galpas->kernel.fw_handle) {
-               int ret = hcall_unmap_page(galpas->kernel.fw_handle);
-               if (ret)
-                       return ret;
-       }
-
-       galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
deleted file mode 100644 (file)
index d1b0299..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware calls
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_PHYP_H__
-#define __HCP_PHYP_H__
-
-
-/*
- * eHCA page (mapped into memory)
- * resource to access eHCA register pages in CPU address space
-*/
-struct h_galpa {
-       u64 fw_handle;
-       /* for pSeries this is a 64bit memory address where
-          I/O memory is mapped into CPU address space (kv) */
-};
-
-/*
- * resource to access eHCA address space registers, all types
- */
-struct h_galpas {
-       u32 pid;                /*PID of userspace galpa checking */
-       struct h_galpa user;    /* user space accessible resource,
-                                  set to 0 if unused */
-       struct h_galpa kernel;  /* kernel space accessible resource,
-                                  set to 0 if unused */
-};
-
-static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
-{
-       u64 addr = galpa.fw_handle + offset;
-       return *(volatile u64 __force *)addr;
-}
-
-static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
-{
-       u64 addr = galpa.fw_handle + offset;
-       *(volatile u64 __force *)addr = value;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user);
-
-int hcp_galpas_dtor(struct h_galpas *galpas);
-
-u64 hcall_map_page(u64 physaddr);
-
-int hcall_unmap_page(u64 mapaddr);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
deleted file mode 100644 (file)
index 9dac93d..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_H__
-#define __HIPZ_FNS_H__
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-#include "hipz_fns_core.h"
-
-#define hipz_galpa_store_eq(gal, offset, value) \
-       hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_eq(gal, offset) \
-       hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qped(gal, offset, value) \
-       hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_qped(gal, offset) \
-       hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
-
-#define hipz_galpa_store_mrmw(gal, offset, value) \
-       hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_mrmw(gal, offset) \
-       hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
deleted file mode 100644 (file)
index 868735f..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_CORE_H__
-#define __HIPZ_FNS_CORE_H__
-
-#include "hcp_phyp.h"
-#include "hipz_hw.h"
-
-#define hipz_galpa_store_cq(gal, offset, value) \
-       hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_cq(gal, offset) \
-       hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qp(gal, offset, value) \
-       hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
-#define hipz_galpa_load_qp(gal, offset) \
-       hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
-
-static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
-                           EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
-}
-
-static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
-                           EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
-}
-
-static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
-{
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
-                           EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
-}
-
-static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n0_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
-                           EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
-                                          value));
-       cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
-}
-
-static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n1_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
-                           EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
-       cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
-}
-
-#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
deleted file mode 100644 (file)
index bf996c7..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  eHCA register definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_HW_H__
-#define __HIPZ_HW_H__
-
-#include "ehca_tools.h"
-
-#define EHCA_MAX_MTU 4
-
-/* QP Table Entry Memory Map */
-struct hipz_qptemm {
-       u64 qpx_hcr;
-       u64 qpx_c;
-       u64 qpx_herr;
-       u64 qpx_aer;
-/* 0x20*/
-       u64 qpx_sqa;
-       u64 qpx_sqc;
-       u64 qpx_rqa;
-       u64 qpx_rqc;
-/* 0x40*/
-       u64 qpx_st;
-       u64 qpx_pmstate;
-       u64 qpx_pmfa;
-       u64 qpx_pkey;
-/* 0x60*/
-       u64 qpx_pkeya;
-       u64 qpx_pkeyb;
-       u64 qpx_pkeyc;
-       u64 qpx_pkeyd;
-/* 0x80*/
-       u64 qpx_qkey;
-       u64 qpx_dqp;
-       u64 qpx_dlidp;
-       u64 qpx_portp;
-/* 0xa0*/
-       u64 qpx_slidp;
-       u64 qpx_slidpp;
-       u64 qpx_dlida;
-       u64 qpx_porta;
-/* 0xc0*/
-       u64 qpx_slida;
-       u64 qpx_slidpa;
-       u64 qpx_slvl;
-       u64 qpx_ipd;
-/* 0xe0*/
-       u64 qpx_mtu;
-       u64 qpx_lato;
-       u64 qpx_rlimit;
-       u64 qpx_rnrlimit;
-/* 0x100*/
-       u64 qpx_t;
-       u64 qpx_sqhp;
-       u64 qpx_sqptp;
-       u64 qpx_nspsn;
-/* 0x120*/
-       u64 qpx_nspsnhwm;
-       u64 reserved1;
-       u64 qpx_sdsi;
-       u64 qpx_sdsbc;
-/* 0x140*/
-       u64 qpx_sqwsize;
-       u64 qpx_sqwts;
-       u64 qpx_lsn;
-       u64 qpx_nssn;
-/* 0x160 */
-       u64 qpx_mor;
-       u64 qpx_cor;
-       u64 qpx_sqsize;
-       u64 qpx_erc;
-/* 0x180*/
-       u64 qpx_rnrrc;
-       u64 qpx_ernrwt;
-       u64 qpx_rnrresp;
-       u64 qpx_lmsna;
-/* 0x1a0 */
-       u64 qpx_sqhpc;
-       u64 qpx_sqcptp;
-       u64 qpx_sigt;
-       u64 qpx_wqecnt;
-/* 0x1c0*/
-       u64 qpx_rqhp;
-       u64 qpx_rqptp;
-       u64 qpx_rqsize;
-       u64 qpx_nrr;
-/* 0x1e0*/
-       u64 qpx_rdmac;
-       u64 qpx_nrpsn;
-       u64 qpx_lapsn;
-       u64 qpx_lcr;
-/* 0x200*/
-       u64 qpx_rwc;
-       u64 qpx_rwva;
-       u64 qpx_rdsi;
-       u64 qpx_rdsbc;
-/* 0x220*/
-       u64 qpx_rqwsize;
-       u64 qpx_crmsn;
-       u64 qpx_rdd;
-       u64 qpx_larpsn;
-/* 0x240*/
-       u64 qpx_pd;
-       u64 qpx_scqn;
-       u64 qpx_rcqn;
-       u64 qpx_aeqn;
-/* 0x260*/
-       u64 qpx_aaelog;
-       u64 qpx_ram;
-       u64 qpx_rdmaqe0;
-       u64 qpx_rdmaqe1;
-/* 0x280*/
-       u64 qpx_rdmaqe2;
-       u64 qpx_rdmaqe3;
-       u64 qpx_nrpsnhwm;
-/* 0x298*/
-       u64 reserved[(0x400 - 0x298) / 8];
-/* 0x400 extended data */
-       u64 reserved_ext[(0x500 - 0x400) / 8];
-/* 0x500 */
-       u64 reserved2[(0x1000 - 0x500) / 8];
-/* 0x1000      */
-};
-
-#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
-
-/* MRMWPT Entry Memory Map */
-struct hipz_mrmwmm {
-       /* 0x00 */
-       u64 mrx_hcr;
-
-       u64 mrx_c;
-       u64 mrx_herr;
-       u64 mrx_aer;
-       /* 0x20 */
-       u64 mrx_pp;
-       u64 reserved1;
-       u64 reserved2;
-       u64 reserved3;
-       /* 0x40 */
-       u64 reserved4[(0x200 - 0x40) / 8];
-       /* 0x200 */
-       u64 mrx_ctl[64];
-
-};
-
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
-
-struct hipz_qpedmm {
-       /* 0x00 */
-       u64 reserved0[(0x400) / 8];
-       /* 0x400 */
-       u64 qpedx_phh;
-       u64 qpedx_ppsgp;
-       /* 0x410 */
-       u64 qpedx_ppsgu;
-       u64 qpedx_ppdgp;
-       /* 0x420 */
-       u64 qpedx_ppdgu;
-       u64 qpedx_aph;
-       /* 0x430 */
-       u64 qpedx_apsgp;
-       u64 qpedx_apsgu;
-       /* 0x440 */
-       u64 qpedx_apdgp;
-       u64 qpedx_apdgu;
-       /* 0x450 */
-       u64 qpedx_apav;
-       u64 qpedx_apsav;
-       /* 0x460  */
-       u64 qpedx_hcr;
-       u64 reserved1[4];
-       /* 0x488 */
-       u64 qpedx_rrl0;
-       /* 0x490 */
-       u64 qpedx_rrrkey0;
-       u64 qpedx_rrva0;
-       /* 0x4a0 */
-       u64 reserved2;
-       u64 qpedx_rrl1;
-       /* 0x4b0 */
-       u64 qpedx_rrrkey1;
-       u64 qpedx_rrva1;
-       /* 0x4c0 */
-       u64 reserved3;
-       u64 qpedx_rrl2;
-       /* 0x4d0 */
-       u64 qpedx_rrrkey2;
-       u64 qpedx_rrva2;
-       /* 0x4e0 */
-       u64 reserved4;
-       u64 qpedx_rrl3;
-       /* 0x4f0 */
-       u64 qpedx_rrrkey3;
-       u64 qpedx_rrva3;
-};
-
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
-
-/* CQ Table Entry Memory Map */
-struct hipz_cqtemm {
-       u64 cqx_hcr;
-       u64 cqx_c;
-       u64 cqx_herr;
-       u64 cqx_aer;
-/* 0x20  */
-       u64 cqx_ptp;
-       u64 cqx_tp;
-       u64 cqx_fec;
-       u64 cqx_feca;
-/* 0x40  */
-       u64 cqx_ep;
-       u64 cqx_eq;
-/* 0x50  */
-       u64 reserved1;
-       u64 cqx_n0;
-/* 0x60  */
-       u64 cqx_n1;
-       u64 reserved2[(0x1000 - 0x60) / 8];
-/* 0x1000 */
-};
-
-#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32, 63)
-#define CQX_FECADDER              EHCA_BMASK_IBM(32, 63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
-
-/* EQ Table Entry Memory Map */
-struct hipz_eqtemm {
-       u64 eqx_hcr;
-       u64 eqx_c;
-
-       u64 eqx_herr;
-       u64 eqx_aer;
-/* 0x20 */
-       u64 eqx_ptp;
-       u64 eqx_tp;
-       u64 eqx_ssba;
-       u64 eqx_psba;
-
-/* 0x40 */
-       u64 eqx_cec;
-       u64 eqx_meql;
-       u64 eqx_xisbi;
-       u64 eqx_xisc;
-/* 0x60 */
-       u64 eqx_it;
-
-};
-
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
-
-/* access control defines for MR/MW */
-#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
-#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
-#define HIPZ_ACCESSCTRL_R_READ   0x00200000
-#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
-#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
-
-/* query hca response block */
-struct hipz_query_hca {
-       u32 cur_reliable_dg;
-       u32 cur_qp;
-       u32 cur_cq;
-       u32 cur_eq;
-       u32 cur_mr;
-       u32 cur_mw;
-       u32 cur_ee_context;
-       u32 cur_mcast_grp;
-       u32 cur_qp_attached_mcast_grp;
-       u32 reserved1;
-       u32 cur_ipv6_qp;
-       u32 cur_eth_qp;
-       u32 cur_hp_mr;
-       u32 reserved2[3];
-       u32 max_rd_domain;
-       u32 max_qp;
-       u32 max_cq;
-       u32 max_eq;
-       u32 max_mr;
-       u32 max_hp_mr;
-       u32 max_mw;
-       u32 max_mrwpte;
-       u32 max_special_mrwpte;
-       u32 max_rd_ee_context;
-       u32 max_mcast_grp;
-       u32 max_total_mcast_qp_attach;
-       u32 max_mcast_qp_attach;
-       u32 max_raw_ipv6_qp;
-       u32 max_raw_ethy_qp;
-       u32 internal_clock_frequency;
-       u32 max_pd;
-       u32 max_ah;
-       u32 max_cqe;
-       u32 max_wqes_wq;
-       u32 max_partitions;
-       u32 max_rr_ee_context;
-       u32 max_rr_qp;
-       u32 max_rr_hca;
-       u32 max_act_wqs_ee_context;
-       u32 max_act_wqs_qp;
-       u32 max_sge;
-       u32 max_sge_rd;
-       u32 memory_page_size_supported;
-       u64 max_mr_size;
-       u32 local_ca_ack_delay;
-       u32 num_ports;
-       u32 vendor_id;
-       u32 vendor_part_id;
-       u32 hw_ver;
-       u64 node_guid;
-       u64 hca_cap_indicators;
-       u32 data_counter_register_size;
-       u32 max_shared_rq;
-       u32 max_isns_eq;
-       u32 max_neq;
-} __attribute__ ((packed));
-
-#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
-#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
-#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
-#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
-#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
-#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
-#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
-#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
-#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
-#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
-#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
-#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
-#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
-#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
-#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
-#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
-#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
-#define HCA_CAP_H_ALLOC_RES_SYNC      EHCA_BMASK_IBM(19, 19)
-
-/* query port response block */
-struct hipz_query_port {
-       u32 state;
-       u32 bad_pkey_cntr;
-       u32 lmc;
-       u32 lid;
-       u32 subnet_timeout;
-       u32 qkey_viol_cntr;
-       u32 sm_sl;
-       u32 sm_lid;
-       u32 capability_mask;
-       u32 init_type_reply;
-       u32 pkey_tbl_len;
-       u32 gid_tbl_len;
-       u64 gid_prefix;
-       u32 port_nr;
-       u16 pkey_entries[16];
-       u8  reserved1[32];
-       u32 trent_size;
-       u32 trbuf_size;
-       u64 max_msg_sz;
-       u32 max_mtu;
-       u32 vl_cap;
-       u32 phys_pstate;
-       u32 phys_state;
-       u32 phys_speed;
-       u32 phys_width;
-       u8  reserved2[1884];
-       u64 guid_entries[255];
-} __attribute__ ((packed));
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
deleted file mode 100644 (file)
index 7ffc748..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ipz_pt_fn.h"
-#include "ehca_classes.h"
-
-#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
-
-struct kmem_cache *small_qp_cache;
-
-void *ipz_qpageit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->pagesize;
-       if (queue->current_q_offset > queue->queue_length) {
-               queue->current_q_offset -= queue->pagesize;
-               ret = NULL;
-       }
-       if (((u64)ret) % queue->pagesize) {
-               ehca_gen_err("ERROR!! not at PAGE-Boundary");
-               return NULL;
-       }
-       return ret;
-}
-
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u64 last_entry_in_q = queue->queue_length - queue->qe_size;
-
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset > last_entry_in_q) {
-               queue->current_q_offset = 0;
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
-{
-       int i;
-       for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-               u64 page = __pa(queue->queue_pages[i]);
-               if (addr >= page && addr < page + queue->pagesize) {
-                       *q_offset = addr - page + i * queue->pagesize;
-                       return 0;
-               }
-       }
-       return -EINVAL;
-}
-
-#if PAGE_SHIFT < EHCA_PAGESHIFT
-#error Kernel pages must be at least as large than eHCA pages (4K) !
-#endif
-
-/*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
-static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
-{
-       int k, f = 0;
-       u8 *kpage;
-
-       while (f < nr_of_pages) {
-               kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
-               if (!kpage)
-                       goto out;
-
-               for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
-                       queue->queue_pages[f] = (struct ipz_page *)kpage;
-                       kpage += EHCA_PAGESIZE;
-                       f++;
-               }
-       }
-       return 1;
-
-out:
-       for (f = 0; f < nr_of_pages && queue->queue_pages[f];
-            f += PAGES_PER_KPAGE)
-               free_page((unsigned long)(queue->queue_pages)[f]);
-       return 0;
-}
-
-static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page;
-       unsigned long bit;
-
-       mutex_lock(&pd->lock);
-
-       if (!list_empty(&pd->free[order]))
-               page = list_entry(pd->free[order].next,
-                                 struct ipz_small_queue_page, list);
-       else {
-               page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
-               if (!page)
-                       goto out;
-
-               page->page = get_zeroed_page(GFP_KERNEL);
-               if (!page->page) {
-                       kmem_cache_free(small_qp_cache, page);
-                       goto out;
-               }
-
-               list_add(&page->list, &pd->free[order]);
-       }
-
-       bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
-       __set_bit(bit, page->bitmap);
-       page->fill++;
-
-       if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
-               list_move(&page->list, &pd->full[order]);
-
-       mutex_unlock(&pd->lock);
-
-       queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
-       queue->small_page = page;
-       queue->offset = bit << (order + 9);
-       return 1;
-
-out:
-       ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
-       mutex_unlock(&pd->lock);
-       return 0;
-}
-
-static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page = queue->small_page;
-       unsigned long bit;
-       int free_page = 0;
-
-       bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
-               >> (order + 9);
-
-       mutex_lock(&pd->lock);
-
-       __clear_bit(bit, page->bitmap);
-       page->fill--;
-
-       if (page->fill == 0) {
-               list_del(&page->list);
-               free_page = 1;
-       }
-
-       if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
-               /* the page was full until we freed the chunk */
-               list_move_tail(&page->list, &pd->free[order]);
-
-       mutex_unlock(&pd->lock);
-
-       if (free_page) {
-               free_page(page->page);
-               kmem_cache_free(small_qp_cache, page);
-       }
-}
-
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small)
-{
-       if (pagesize > PAGE_SIZE) {
-               ehca_gen_err("FATAL ERROR: pagesize=%x "
-                            "is greater than kernel page size", pagesize);
-               return 0;
-       }
-
-       /* init queue fields */
-       queue->queue_length = nr_of_pages * pagesize;
-       queue->pagesize = pagesize;
-       queue->qe_size = qe_size;
-       queue->act_nr_of_sg = nr_of_sg;
-       queue->current_q_offset = 0;
-       queue->toggle_state = 1;
-       queue->small_page = NULL;
-
-       /* allocate queue page pointers */
-       queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
-                                    GFP_KERNEL | __GFP_NOWARN);
-       if (!queue->queue_pages) {
-               queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
-               if (!queue->queue_pages) {
-                       ehca_gen_err("Couldn't allocate queue page list");
-                       return 0;
-               }
-       }
-
-       /* allocate actual queue pages */
-       if (is_small) {
-               if (!alloc_small_queue_page(queue, pd))
-                       goto ipz_queue_ctor_exit0;
-       } else
-               if (!alloc_queue_pages(queue, nr_of_pages))
-                       goto ipz_queue_ctor_exit0;
-
-       return 1;
-
-ipz_queue_ctor_exit0:
-       ehca_gen_err("Couldn't alloc pages queue=%p "
-                "nr_of_pages=%x",  queue, nr_of_pages);
-       kvfree(queue->queue_pages);
-
-       return 0;
-}
-
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
-{
-       int i, nr_pages;
-
-       if (!queue || !queue->queue_pages) {
-               ehca_gen_dbg("queue or queue_pages is NULL");
-               return 0;
-       }
-
-       if (queue->small_page)
-               free_small_queue_page(queue, pd);
-       else {
-               nr_pages = queue->queue_length / queue->pagesize;
-               for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
-                       free_page((unsigned long)queue->queue_pages[i]);
-       }
-
-       kvfree(queue->queue_pages);
-
-       return 1;
-}
-
-int ehca_init_small_qp_cache(void)
-{
-       small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
-                                          sizeof(struct ipz_small_queue_page),
-                                          0, SLAB_HWCACHE_ALIGN, NULL);
-       if (!small_qp_cache)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void ehca_cleanup_small_qp_cache(void)
-{
-       kmem_cache_destroy(small_qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
deleted file mode 100644 (file)
index a801274..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __IPZ_PT_FN_H__
-#define __IPZ_PT_FN_H__
-
-#define EHCA_PAGESHIFT   12
-#define EHCA_PAGESIZE   4096UL
-#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
-#define EHCA_PT_ENTRIES 512UL
-
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-
-struct ehca_pd;
-struct ipz_small_queue_page;
-
-extern struct kmem_cache *small_qp_cache;
-
-/* struct generic ehca page */
-struct ipz_page {
-       u8 entries[EHCA_PAGESIZE];
-};
-
-#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
-
-struct ipz_small_queue_page {
-       unsigned long page;
-       unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
-       int fill;
-       void *mapped_addr;
-       u32 mmap_count;
-       struct list_head list;
-};
-
-/* struct generic queue in linux kernel virtual memory (kv) */
-struct ipz_queue {
-       u64 current_q_offset;   /* current queue entry */
-
-       struct ipz_page **queue_pages;  /* array of pages belonging to queue */
-       u32 qe_size;            /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length;       /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;       /* toggle flag - per page */
-       u32 offset; /* save offset within page for small_qp */
-       struct ipz_small_queue_page *small_page;
-};
-
-/*
- * return current Queue Entry for a certain q_offset
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
-{
-       struct ipz_page *current_page;
-       if (q_offset >= queue->queue_length)
-               return NULL;
-       current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-       return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
-}
-
-/*
- * return current Queue Entry
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_get(struct ipz_queue *queue)
-{
-       return ipz_qeit_calc(queue, queue->current_q_offset);
-}
-
-/*
- * return current Queue Page , increment Queue Page iterator from
- * page to page in struct ipz_queue, last increment will return 0! and
- * NOT wrap
- * returns address (kv) of Queue Page
- * warning don't use in parallel with ipz_QE_get_inc()
- */
-void *ipz_qpageit_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset >= queue->queue_length) {
-               queue->current_q_offset = 0;
-               /* toggle the valid flag */
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-/*
- * return a bool indicating whether current Queue Entry is valid
- */
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-       struct ehca_cqe *cqe = ipz_qeit_get(queue);
-       return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
-}
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
-{
-       return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
-}
-
-/*
- * returns and resets Queue Entry iterator
- * returns address (kv) of first Queue Entry
- */
-static inline void *ipz_qeit_reset(struct ipz_queue *queue)
-{
-       queue->current_q_offset = 0;
-       return ipz_qeit_get(queue);
-}
-
-/*
- * return the q_offset corresponding to an absolute address
- */
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
-
-/*
- * return the next queue offset. don't modify the queue.
- */
-static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
-{
-       offset += queue->qe_size;
-       if (offset >= queue->queue_length) offset = 0;
-       return offset;
-}
-
-/* struct generic page table */
-struct ipz_pt {
-       u64 entries[EHCA_PT_ENTRIES];
-};
-
-/* struct page table for a queue, only to be used in pf */
-struct ipz_qpt {
-       /* queue page tables (kv), use u64 because we know the element length */
-       u64 *qpts;
-       u32 n_qpts;
-       u32 n_ptes;       /*  number of page table entries */
-       u64 *current_pte_addr;
-};
-
-/*
- * constructor for a ipz_queue_t, placement new for ipz_queue_t,
- * new for all dependent datastructors
- * all QP Tables are the same
- * flow:
- *    allocate+pin queue
- * see ipz_qpt_ctor()
- * returns true if ok, false if out of memory
- */
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small);
-
-/*
- * destructor for a ipz_queue_t
- *  -# free queue
- *  see ipz_queue_ctor()
- *  returns true if ok, false if queue was NULL-ptr of free failed
- */
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
-
-/*
- * constructor for a ipz_qpt_t,
- * placement new for struct ipz_queue, new for all dependent datastructors
- * all QP Tables are the same,
- * flow:
- * -# allocate+pin queue
- * -# initialise ptcb
- * -# allocate+pin PTs
- * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
- * -# the ring must have room for exactly nr_of_PTEs
- * see ipz_qpt_ctor()
- */
-void ipz_qpt_ctor(struct ipz_qpt *qpt,
-                 const u32 nr_of_qes,
-                 const u32 pagesize,
-                 const u32 qe_size,
-                 const u8 lowbyte, const u8 toggle,
-                 u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- * fix EQ page problems
- */
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Event Queue Entry, increment Queue Entry iterator
- * by one step in struct ipz_queue if valid, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_queue_QPageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- */
-static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       ipz_qeit_eq_get_inc(queue); /* this is a good one */
-       return ret;
-}
-
-static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       return ret;
-}
-
-/* returns address (GX) of first queue entry */
-static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
-{
-       return be64_to_cpu(qpt->qpts[0]);
-}
-
-/* returns address (kv) of first page of queue page table */
-static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
-{
-       return qpt->qpts;
-}
-
-#endif                         /* __IPZ_PT_FN_H__ */
index 568f185..a3f8b88 100644 (file)
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = hfi1_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -187,52 +184,6 @@ bail:
        goto done;
 }
 
-/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct hfi1_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == HFI1_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * hfi1_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index ef0feaa..09b8d41 100644 (file)
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        ibdev->poll_cq = hfi1_poll_cq;
        ibdev->req_notify_cq = hfi1_req_notify_cq;
        ibdev->get_dma_mr = hfi1_get_dma_mr;
-       ibdev->reg_phys_mr = hfi1_reg_phys_mr;
        ibdev->reg_user_mr = hfi1_reg_user_mr;
        ibdev->dereg_mr = hfi1_dereg_mr;
        ibdev->alloc_mr = hfi1_alloc_mr;
index 72106e5..286e468 100644 (file)
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                               u64 virt_addr, int mr_access_flags,
                               struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644 (file)
index 041ce06..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-config INFINIBAND_IPATH
-       tristate "QLogic HTX HCA support"
-       depends on 64BIT && NET && HT_IRQ
-       ---help---
-       This is a driver for the deprecated QLogic Hyper-Transport
-       IB host channel adapter (model QHT7140),
-       including InfiniBand verbs support.  This driver allows these
-       devices to be used with both kernel upper level protocols such
-       as IP-over-InfiniBand as well as with userspace applications
-       (in conjunction with InfiniBand userspace access).
-       For QLogic PCIe QLE based cards, use the QIB driver instead.
-
-       If you have this hardware you will need to boot with PAT disabled
-       on your x86-64 systems, use the nopat kernel parameter.
-
-       Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644 (file)
index 4496f28..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-       -DIPATH_KERN_TYPE=0
-
-obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-
-ib_ipath-y := \
-       ipath_cq.o \
-       ipath_diag.o \
-       ipath_dma.o \
-       ipath_driver.o \
-       ipath_eeprom.o \
-       ipath_file_ops.o \
-       ipath_fs.o \
-       ipath_init_chip.o \
-       ipath_intr.o \
-       ipath_keys.o \
-       ipath_mad.o \
-       ipath_mmap.o \
-       ipath_mr.o \
-       ipath_qp.o \
-       ipath_rc.o \
-       ipath_ruc.o \
-       ipath_sdma.o \
-       ipath_srq.o \
-       ipath_stats.o \
-       ipath_sysfs.o \
-       ipath_uc.o \
-       ipath_ud.o \
-       ipath_user_pages.o \
-       ipath_user_sdma.o \
-       ipath_verbs_mcast.o \
-       ipath_verbs.o
-
-ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-
-ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644 (file)
index cb00158..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-The ipath driver has been moved to staging in preparation for its removal in a
-few releases. The driver will be deleted during the 4.6 merge window.
-
-Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
-Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644 (file)
index 28cfe97..0000000
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_COMMON_H
-#define _IPATH_COMMON_H
-
-/*
- * This file contains defines, structures, etc. that are used
- * to communicate between kernel and user code.
- */
-
-
-/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
-#define IPATH_SRC_OUI_1 0x00
-#define IPATH_SRC_OUI_2 0x11
-#define IPATH_SRC_OUI_3 0x75
-
-/* version of protocol header (known to chip also). In the long run,
- * we should be able to generate and accept a range of version numbers;
- * for now we only accept one, and it's compiled in.
- */
-#define IPS_PROTO_VERSION 2
-
-/*
- * These are compile time constants that you may want to enable or disable
- * if you are trying to debug problems with code or performance.
- * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
- * fastpath code
- * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
- * traced in faspath code
- * _IPATH_TRACING define as 0 if you want to remove all tracing in a
- * compilation unit
- * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
- */
-
-/*
- * The value in the BTH QP field that InfiniPath uses to differentiate
- * an infinipath protocol IB packet vs standard IB transport
- */
-#define IPATH_KD_QP 0x656b79
-
-/*
- * valid states passed to ipath_set_linkstate() user call
- */
-#define IPATH_IB_LINKDOWN              0
-#define IPATH_IB_LINKARM               1
-#define IPATH_IB_LINKACTIVE            2
-#define IPATH_IB_LINKDOWN_ONLY         3
-#define IPATH_IB_LINKDOWN_SLEEP                4
-#define IPATH_IB_LINKDOWN_DISABLE      5
-#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
-#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
-#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
-#define IPATH_IB_LINK_HRTBT    9 /* enable heartbeat, normal, non-loopback */
-
-/*
- * These 3 values (SDR and DDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs.  They
- * are also the the possible values for ipath_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define IPATH_IB_SDR 1
-#define IPATH_IB_DDR 2
-
-/*
- * stats maintained by the driver.  For now, at least, this is global
- * to all minor devices.
- */
-struct infinipath_stats {
-       /* number of interrupts taken */
-       __u64 sps_ints;
-       /* number of interrupts for errors */
-       __u64 sps_errints;
-       /* number of errors from chip (not incl. packet errors or CRC) */
-       __u64 sps_errs;
-       /* number of packet errors from chip other than CRC */
-       __u64 sps_pkterrs;
-       /* number of packets with CRC errors (ICRC and VCRC) */
-       __u64 sps_crcerrs;
-       /* number of hardware errors reported (parity, etc.) */
-       __u64 sps_hwerrs;
-       /* number of times IB link changed state unexpectedly */
-       __u64 sps_iblink;
-       __u64 sps_unused; /* was fastrcvint, no longer implemented */
-       /* number of kernel (port0) packets received */
-       __u64 sps_port0pkts;
-       /* number of "ethernet" packets sent by driver */
-       __u64 sps_ether_spkts;
-       /* number of "ethernet" packets received by driver */
-       __u64 sps_ether_rpkts;
-       /* number of SMA packets sent by driver. Obsolete. */
-       __u64 sps_sma_spkts;
-       /* number of SMA packets received by driver. Obsolete. */
-       __u64 sps_sma_rpkts;
-       /* number of times all ports rcvhdrq was full and packet dropped */
-       __u64 sps_hdrqfull;
-       /* number of times all ports egrtid was full and packet dropped */
-       __u64 sps_etidfull;
-       /*
-        * number of times we tried to send from driver, but no pio buffers
-        * avail
-        */
-       __u64 sps_nopiobufs;
-       /* number of ports currently open */
-       __u64 sps_ports;
-       /* list of pkeys (other than default) accepted (0 means not set) */
-       __u16 sps_pkeys[4];
-       __u16 sps_unused16[4]; /* available; maintaining compatible layout */
-       /* number of user ports per chip (not IB ports) */
-       __u32 sps_nports;
-       /* not our interrupt, or already handled */
-       __u32 sps_nullintr;
-       /* max number of packets handled per receive call */
-       __u32 sps_maxpkts_call;
-       /* avg number of packets handled per receive call */
-       __u32 sps_avgpkts_call;
-       /* total number of pages locked */
-       __u64 sps_pagelocks;
-       /* total number of pages unlocked */
-       __u64 sps_pageunlocks;
-       /*
-        * Number of packets dropped in kernel other than errors (ether
-        * packets if ipath not configured, etc.)
-        */
-       __u64 sps_krdrops;
-       __u64 sps_txeparity; /* PIO buffer parity error, recovered */
-       /* pad for future growth */
-       __u64 __sps_pad[45];
-};
-
-/*
- * These are the status bits readable (in ascii form, 64bit value)
- * from the "status" sysfs file.
- */
-#define IPATH_STATUS_INITTED       0x1 /* basic initialization done */
-#define IPATH_STATUS_DISABLED      0x2 /* hardware disabled */
-/* Device has been disabled via admin request */
-#define IPATH_STATUS_ADMIN_DISABLED    0x4
-/* Chip has been found and initted */
-#define IPATH_STATUS_CHIP_PRESENT 0x20
-/* IB link is at ACTIVE, usable for data traffic */
-#define IPATH_STATUS_IB_READY     0x40
-/* link is configured, LID, MTU, etc. have been set */
-#define IPATH_STATUS_IB_CONF      0x80
-/* no link established, probably no cable */
-#define IPATH_STATUS_IB_NOCABLE  0x100
-/* A Fatal hardware error has occurred. */
-#define IPATH_STATUS_HWERROR     0x200
-
-/*
- * The list of usermode accessible registers.  Also see Reg_* later in file.
- */
-typedef enum _ipath_ureg {
-       /* (RO)  DMA RcvHdr to be used next. */
-       ur_rcvhdrtail = 0,
-       /* (RW)  RcvHdr entry to be processed next by host. */
-       ur_rcvhdrhead = 1,
-       /* (RO)  Index of next Eager index to use. */
-       ur_rcvegrindextail = 2,
-       /* (RW)  Eager TID to be processed next */
-       ur_rcvegrindexhead = 3,
-       /* For internal use only; max register number. */
-       _IPATH_UregMax
-} ipath_ureg;
-
-/* bit values for spi_runtime_flags */
-#define IPATH_RUNTIME_HT       0x1
-#define IPATH_RUNTIME_PCIE     0x2
-#define IPATH_RUNTIME_FORCE_WC_ORDER   0x4
-#define IPATH_RUNTIME_RCVHDR_COPY      0x8
-#define IPATH_RUNTIME_MASTER   0x10
-#define IPATH_RUNTIME_NODMA_RTAIL 0x80
-#define IPATH_RUNTIME_SDMA           0x200
-#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
-#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
-
-/*
- * This structure is returned by ipath_userinit() immediately after
- * open to get implementation-specific info, and info specific to this
- * instance.
- *
- * This struct must have explict pad fields where type sizes
- * may result in different alignments between 32 and 64 bit
- * programs, since the 64 bit * bit kernel requires the user code
- * to have matching offsets
- */
-struct ipath_base_info {
-       /* version of hardware, for feature checking. */
-       __u32 spi_hw_version;
-       /* version of software, for feature checking. */
-       __u32 spi_sw_version;
-       /* InfiniPath port assigned, goes into sent packets */
-       __u16 spi_port;
-       __u16 spi_subport;
-       /*
-        * IB MTU, packets IB data must be less than this.
-        * The MTU is in bytes, and will be a multiple of 4 bytes.
-        */
-       __u32 spi_mtu;
-       /*
-        * Size of a PIO buffer.  Any given packet's total size must be less
-        * than this (in words).  Included is the starting control word, so
-        * if 513 is returned, then total pkt size is 512 words or less.
-        */
-       __u32 spi_piosize;
-       /* size of the TID cache in infinipath, in entries */
-       __u32 spi_tidcnt;
-       /* size of the TID Eager list in infinipath, in entries */
-       __u32 spi_tidegrcnt;
-       /* size of a single receive header queue entry in words. */
-       __u32 spi_rcvhdrent_size;
-       /*
-        * Count of receive header queue entries allocated.
-        * This may be less than the spu_rcvhdrcnt passed in!.
-        */
-       __u32 spi_rcvhdr_cnt;
-
-       /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
-       __u32 spi_runtime_flags;
-
-       /* address where receive buffer queue is mapped into */
-       __u64 spi_rcvhdr_base;
-
-       /* user program. */
-
-       /* base address of eager TID receive buffers. */
-       __u64 spi_rcv_egrbufs;
-
-       /* Allocated by initialization code, not by protocol. */
-
-       /*
-        * Size of each TID buffer in host memory, starting at
-        * spi_rcv_egrbufs.  The buffers are virtually contiguous.
-        */
-       __u32 spi_rcv_egrbufsize;
-       /*
-        * The special QP (queue pair) value that identifies an infinipath
-        * protocol packet from standard IB packets.  More, probably much
-        * more, to be added.
-        */
-       __u32 spi_qpair;
-
-       /*
-        * User register base for init code, not to be used directly by
-        * protocol or applications.
-        */
-       __u64 __spi_uregbase;
-       /*
-        * Maximum buffer size in bytes that can be used in a single TID
-        * entry (assuming the buffer is aligned to this boundary).  This is
-        * the minimum of what the hardware and software support Guaranteed
-        * to be a power of 2.
-        */
-       __u32 spi_tid_maxsize;
-       /*
-        * alignment of each pio send buffer (byte count
-        * to add to spi_piobufbase to get to second buffer)
-        */
-       __u32 spi_pioalign;
-       /*
-        * The index of the first pio buffer available to this process;
-        * needed to do lookup in spi_pioavailaddr; not added to
-        * spi_piobufbase.
-        */
-       __u32 spi_pioindex;
-        /* number of buffers mapped for this process */
-       __u32 spi_piocnt;
-
-       /*
-        * Base address of writeonly pio buffers for this process.
-        * Each buffer has spi_piosize words, and is aligned on spi_pioalign
-        * boundaries.  spi_piocnt buffers are mapped from this address
-        */
-       __u64 spi_piobufbase;
-
-       /*
-        * Base address of readonly memory copy of the pioavail registers.
-        * There are 2 bits for each buffer.
-        */
-       __u64 spi_pioavailaddr;
-
-       /*
-        * Address where driver updates a copy of the interface and driver
-        * status (IPATH_STATUS_*) as a 64 bit value.  It's followed by a
-        * string indicating hardware error, if there was one.
-        */
-       __u64 spi_status;
-
-       /* number of chip ports available to user processes */
-       __u32 spi_nports;
-       /* unit number of chip we are using */
-       __u32 spi_unit;
-       /* num bufs in each contiguous set */
-       __u32 spi_rcv_egrperchunk;
-       /* size in bytes of each contiguous set */
-       __u32 spi_rcv_egrchunksize;
-       /* total size of mmap to cover full rcvegrbuffers */
-       __u32 spi_rcv_egrbuftotlen;
-       __u32 spi_filler_for_align;
-       /* address of readonly memory copy of the rcvhdrq tail register. */
-       __u64 spi_rcvhdr_tailaddr;
-
-       /* shared memory pages for subports if port is shared */
-       __u64 spi_subport_uregbase;
-       __u64 spi_subport_rcvegrbuf;
-       __u64 spi_subport_rcvhdr_base;
-
-       /* shared memory page for hardware port if it is shared */
-       __u64 spi_port_uregbase;
-       __u64 spi_port_rcvegrbuf;
-       __u64 spi_port_rcvhdr_base;
-       __u64 spi_port_rcvhdr_tailaddr;
-
-} __attribute__ ((aligned(8)));
-
-
-/*
- * This version number is given to the driver by the user code during
- * initialization in the spu_userversion field of ipath_user_info, so
- * the driver can check for compatibility with user code.
- *
- * The major version changes when data structures
- * change in an incompatible way.  The driver must be the same or higher
- * for initialization to succeed.  In some cases, a higher version
- * driver will not interoperate with older software, and initialization
- * will return an error.
- */
-#define IPATH_USER_SWMAJOR 1
-
-/*
- * Minor version differences are always compatible
- * a within a major version, however if user software is larger
- * than driver software, some new features and/or structure fields
- * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference.
- */
-#define IPATH_USER_SWMINOR 6
-
-#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
-
-#define IPATH_KERN_TYPE 0
-
-/*
- * Similarly, this is the kernel version going back to the user.  It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a QLogic release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of ipath_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
-
-/*
- * This structure is passed to ipath_userinit() to tell the driver where
- * user code buffers are, sizes, etc.   The offsets and sizes of the
- * fields must remain unchanged, for binary compatibility.  It can
- * be extended, if userversion is changed so user code can tell, if needed
- */
-struct ipath_user_info {
-       /*
-        * version of user software, to detect compatibility issues.
-        * Should be set to IPATH_USER_SWVERSION.
-        */
-       __u32 spu_userversion;
-
-       /* desired number of receive header queue entries */
-       __u32 spu_rcvhdrcnt;
-
-       /* size of struct base_info to write to */
-       __u32 spu_base_info_size;
-
-       /*
-        * number of words in KD protocol header
-        * This tells InfiniPath how many words to copy to rcvhdrq.  If 0,
-        * kernel uses a default.  Once set, attempts to set any other value
-        * are an error (EAGAIN) until driver is reloaded.
-        */
-       __u32 spu_rcvhdrsize;
-
-       /*
-        * If two or more processes wish to share a port, each process
-        * must set the spu_subport_cnt and spu_subport_id to the same
-        * values.  The only restriction on the spu_subport_id is that
-        * it be unique for a given node.
-        */
-       __u16 spu_subport_cnt;
-       __u16 spu_subport_id;
-
-       __u32 spu_unused; /* kept for compatible layout */
-
-       /*
-        * address of struct base_info to write to
-        */
-       __u64 spu_base_info;
-
-} __attribute__ ((aligned(8)));
-
-/* User commands. */
-
-#define IPATH_CMD_MIN          16
-
-#define __IPATH_CMD_USER_INIT  16      /* old set up userspace (for old user code) */
-#define IPATH_CMD_PORT_INFO    17      /* find out what resources we got */
-#define IPATH_CMD_RECV_CTRL    18      /* control receipt of packets */
-#define IPATH_CMD_TID_UPDATE   19      /* update expected TID entries */
-#define IPATH_CMD_TID_FREE     20      /* free expected TID entries */
-#define IPATH_CMD_SET_PART_KEY 21      /* add partition key */
-#define __IPATH_CMD_SLAVE_INFO 22      /* return info on slave processes (for old user code) */
-#define IPATH_CMD_ASSIGN_PORT  23      /* allocate HCA and port */
-#define IPATH_CMD_USER_INIT    24      /* set up userspace */
-#define IPATH_CMD_UNUSED_1     25
-#define IPATH_CMD_UNUSED_2     26
-#define IPATH_CMD_PIOAVAILUPD  27      /* force an update of PIOAvail reg */
-#define IPATH_CMD_POLL_TYPE    28      /* set the kind of polling we want */
-#define IPATH_CMD_ARMLAUNCH_CTRL       29 /* armlaunch detection control */
-/* 30 is unused */
-#define IPATH_CMD_SDMA_INFLIGHT 31     /* sdma inflight counter request */
-#define IPATH_CMD_SDMA_COMPLETE 32     /* sdma completion counter request */
-
-/*
- * Poll types
- */
-#define IPATH_POLL_TYPE_URGENT  0x01
-#define IPATH_POLL_TYPE_OVERFLOW 0x02
-
-struct ipath_port_info {
-       __u32 num_active;       /* number of active units */
-       __u32 unit;             /* unit (chip) assigned to caller */
-       __u16 port;             /* port on unit assigned to caller */
-       __u16 subport;          /* subport on unit assigned to caller */
-       __u16 num_ports;        /* number of ports available on unit */
-       __u16 num_subports;     /* number of subports opened on port */
-};
-
-struct ipath_tid_info {
-       __u32 tidcnt;
-       /* make structure same size in 32 and 64 bit */
-       __u32 tid__unused;
-       /* virtual address of first page in transfer */
-       __u64 tidvaddr;
-       /* pointer (same size 32/64 bit) to __u16 tid array */
-       __u64 tidlist;
-
-       /*
-        * pointer (same size 32/64 bit) to bitmap of TIDs used
-        * for this call; checked for being large enough at open
-        */
-       __u64 tidmap;
-};
-
-struct ipath_cmd {
-       __u32 type;                     /* command type */
-       union {
-               struct ipath_tid_info tid_info;
-               struct ipath_user_info user_info;
-
-               /*
-                * address in userspace where we should put the sdma
-                * inflight counter
-                */
-               __u64 sdma_inflight;
-               /*
-                * address in userspace where we should put the sdma
-                * completion counter
-                */
-               __u64 sdma_complete;
-               /* address in userspace of struct ipath_port_info to
-                  write result to */
-               __u64 port_info;
-               /* enable/disable receipt of packets */
-               __u32 recv_ctrl;
-               /* enable/disable armlaunch errors (non-zero to enable) */
-               __u32 armlaunch_ctrl;
-               /* partition key to set */
-               __u16 part_key;
-               /* user address of __u32 bitmask of active slaves */
-               __u64 slave_mask_addr;
-               /* type of polling we want */
-               __u16 poll_type;
-       } cmd;
-};
-
-struct ipath_iovec {
-       /* Pointer to data, but same size 32 and 64 bit */
-       __u64 iov_base;
-
-       /*
-        * Length of data; don't need 64 bits, but want
-        * ipath_sendpkt to remain same size as before 32 bit changes, so...
-        */
-       __u64 iov_len;
-};
-
-/*
- * Describes a single packet for send.  Each packet can have one or more
- * buffers, but the total length (exclusive of IB headers) must be less
- * than the MTU, and if using the PIO method, entire packet length,
- * including IB headers, must be less than the ipath_piosize value (words).
- * Use of this necessitates including sys/uio.h
- */
-struct __ipath_sendpkt {
-       __u32 sps_flags;        /* flags for packet (TBD) */
-       __u32 sps_cnt;          /* number of entries to use in sps_iov */
-       /* array of iov's describing packet. TEMPORARY */
-       struct ipath_iovec sps_iov[4];
-};
-
-/*
- * diagnostics can send a packet by "writing" one of the following
- * two structs to diag data special file
- * The first is the legacy version for backward compatibility
- */
-struct ipath_diag_pkt {
-       __u32 unit;
-       __u64 data;
-       __u32 len;
-};
-
-/* The second diag_pkt struct is the expanded version that allows
- * more control over the packet, specifically, by allowing a custom
- * pbc (+ static rate) qword, so that special modes and deliberate
- * changes to CRCs can be used. The elements were also re-ordered
- * for better alignment and to avoid padding issues.
- */
-struct ipath_diag_xpkt {
-       __u64 data;
-       __u64 pbc_wd;
-       __u32 unit;
-       __u32 len;
-};
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define IPATH_FLASH_VERSION 2
-struct ipath_flash {
-       /* flash layout version (IPATH_FLASH_VERSION) */
-       __u8 if_fversion;
-       /* checksum protecting if_length bytes */
-       __u8 if_csum;
-       /*
-        * valid length (in use, protected by if_csum), including
-        * if_fversion and if_csum themselves)
-        */
-       __u8 if_length;
-       /* the GUID, in network order */
-       __u8 if_guid[8];
-       /* number of GUIDs to use, starting from if_guid */
-       __u8 if_numguid;
-       /* the (last 10 characters of) board serial number, in ASCII */
-       char if_serial[12];
-       /* board mfg date (YYYYMMDD ASCII) */
-       char if_mfgdate[8];
-       /* last board rework/test date (YYYYMMDD ASCII) */
-       char if_testdate[8];
-       /* logging of error counts, TBD */
-       __u8 if_errcntp[4];
-       /* powered on hours, updated at driver unload */
-       __u8 if_powerhour[2];
-       /* ASCII free-form comment field */
-       char if_comment[32];
-       /* Backwards compatible prefix for longer QLogic Serial Numbers */
-       char if_sprefix[4];
-       /* 82 bytes used, min flash size is 128 bytes */
-       __u8 if_future[46];
-};
-
-/*
- * These are the counters implemented in the chip, and are listed in order.
- * The InterCaps naming is taken straight from the chip spec.
- */
-struct infinipath_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 TxSDmaDescCnt;    /* was Reserved1 */
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 RxP9HdrEgrOvflCnt;        /* was Reserved6 */
-       __u64 RxP10HdrEgrOvflCnt;       /* was Reserved7 */
-       __u64 RxP11HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP12HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP13HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP14HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP15HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP16HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-       /* The following are new for IBA7220 */
-       __u64 RxVL15DroppedPktCnt;
-       __u64 RxOtherLocalPhyErrCnt;
-       __u64 PcieRetryBufDiagQwordCnt;
-       __u64 ExcessBufferOvflCnt;
-       __u64 LocalLinkIntegrityErrCnt;
-       __u64 RxVlErrCnt;
-       __u64 RxDlidFltrCnt;
-};
-
-/*
- * The next set of defines are for packet headers, and chip register
- * and memory bits that are visible to and/or used by user-mode software
- * The other bits that are used only by the driver or diags are in
- * ipath_registers.h
- */
-
-/* RcvHdrFlags bits */
-#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
-#define INFINIPATH_RHF_LENGTH_SHIFT 0
-#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
-#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
-#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
-#define INFINIPATH_RHF_SEQ_MASK 0xF
-#define INFINIPATH_RHF_SEQ_SHIFT 0
-#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
-#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
-#define INFINIPATH_RHF_H_ICRCERR   0x80000000
-#define INFINIPATH_RHF_H_VCRCERR   0x40000000
-#define INFINIPATH_RHF_H_PARITYERR 0x20000000
-#define INFINIPATH_RHF_H_LENERR    0x10000000
-#define INFINIPATH_RHF_H_MTUERR    0x08000000
-#define INFINIPATH_RHF_H_IHDRERR   0x04000000
-#define INFINIPATH_RHF_H_TIDERR    0x02000000
-#define INFINIPATH_RHF_H_MKERR     0x01000000
-#define INFINIPATH_RHF_H_IBERR     0x00800000
-#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
-#define INFINIPATH_RHF_L_USE_EGR   0x80000000
-#define INFINIPATH_RHF_L_SWA       0x00008000
-#define INFINIPATH_RHF_L_SWB       0x00004000
-
-/* infinipath header fields */
-#define INFINIPATH_I_VERS_MASK 0xF
-#define INFINIPATH_I_VERS_SHIFT 28
-#define INFINIPATH_I_PORT_MASK 0xF
-#define INFINIPATH_I_PORT_SHIFT 24
-#define INFINIPATH_I_TID_MASK 0x7FF
-#define INFINIPATH_I_TID_SHIFT 13
-#define INFINIPATH_I_OFFSET_MASK 0x1FFF
-#define INFINIPATH_I_OFFSET_SHIFT 0
-
-/* K_PktFlags bits */
-#define INFINIPATH_KPF_INTR 0x1
-#define INFINIPATH_KPF_SUBPORT_MASK 0x3
-#define INFINIPATH_KPF_SUBPORT_SHIFT 1
-
-#define INFINIPATH_MAX_SUBPORT 4
-
-/* SendPIO per-buffer control */
-#define INFINIPATH_SP_TEST    0x40
-#define INFINIPATH_SP_TESTEBP 0x20
-#define INFINIPATH_SP_TRIGGER_SHIFT  15
-
-/* SendPIOAvail bits */
-#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
-#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
-
-/* infinipath header format */
-struct ipath_header {
-       /*
-        * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
-        * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-        * Port 4, TID 11, offset 13.
-        */
-       __le32 ver_port_tid_offset;
-       __le16 chksum;
-       __le16 pkt_flags;
-};
-
-/* infinipath user message header format.
- * This structure contains the first 4 fields common to all protocols
- * that employ infinipath.
- */
-struct ipath_message_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       /* fields below this point are in host byte order */
-       struct ipath_header iph;
-       __u8 sub_opcode;
-};
-
-/* infinipath ethernet header format */
-struct ether_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       struct ipath_header iph;
-       __u8 sub_opcode;
-       __u8 cmd;
-       __be16 lid;
-       __u16 mac[3];
-       __u8 frag_num;
-       __u8 seq_num;
-       __le32 len;
-       /* MUST be of word size due to PIO write requirements */
-       __le32 csum;
-       __le16 csum_offset;
-       __le16 flags;
-       __u16 first_2_bytes;
-       __u8 unused[2];         /* currently unused */
-};
-
-
-/* IB - LRH header consts */
-#define IPATH_LRH_GRH 0x0003   /* 1. word of IB LRH - next header: GRH */
-#define IPATH_LRH_BTH 0x0002   /* 1. word of IB LRH - next header: BTH */
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define IPATH_DEFAULT_P_KEY 0xFFFF
-#define IPATH_PERMISSIVE_LID 0xFFFF
-#define IPATH_AETH_CREDIT_SHIFT 24
-#define IPATH_AETH_CREDIT_MASK 0x1F
-#define IPATH_AETH_CREDIT_INVAL 0x1F
-#define IPATH_PSN_MASK 0xFFFFFF
-#define IPATH_MSN_MASK 0xFFFFFF
-#define IPATH_QPN_MASK 0xFFFFFF
-#define IPATH_MULTICAST_LID_BASE 0xC000
-#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
-#define IPATH_MULTICAST_QPN 0xFFFFFF
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-
-/* sub OpCodes - ith4x  */
-#define IPATH_ITH4X_OPCODE_ENCAP 0x81
-#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
-
-#define IPATH_HEADER_QUEUE_WORDS 9
-
-/* functions for extracting fields from rcvhdrq entries for the driver.
- */
-static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
-{
-       return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
-}
-
-static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
-           & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
-{
-       return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
-               & INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
-           & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
-               & INFINIPATH_RHF_SEQ_MASK;
-}
-
-static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
-               & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
-}
-
-static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
-{
-       return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
-}
-
-static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
-{
-       return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
-           & INFINIPATH_I_VERS_MASK;
-}
-
-#endif                         /* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644 (file)
index e9dd911..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
-{
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       u32 head;
-       u32 next;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       /*
-        * Note that the head pointer might be writable by user processes.
-        * Take care to verify it is a sane value.
-        */
-       wc = cq->queue;
-       head = wc->head;
-       if (head >= (unsigned) cq->ibcq.cqe) {
-               head = cq->ibcq.cqe;
-               next = 0;
-       } else
-               next = head + 1;
-       if (unlikely(next == wc->tail)) {
-               spin_unlock_irqrestore(&cq->lock, flags);
-               if (cq->ibcq.event_handler) {
-                       struct ib_event ev;
-
-                       ev.device = cq->ibcq.device;
-                       ev.element.cq = &cq->ibcq;
-                       ev.event = IB_EVENT_CQ_ERR;
-                       cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-               }
-               return;
-       }
-       if (cq->ip) {
-               wc->uqueue[head].wr_id = entry->wr_id;
-               wc->uqueue[head].status = entry->status;
-               wc->uqueue[head].opcode = entry->opcode;
-               wc->uqueue[head].vendor_err = entry->vendor_err;
-               wc->uqueue[head].byte_len = entry->byte_len;
-               wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
-               wc->uqueue[head].qp_num = entry->qp->qp_num;
-               wc->uqueue[head].src_qp = entry->src_qp;
-               wc->uqueue[head].wc_flags = entry->wc_flags;
-               wc->uqueue[head].pkey_index = entry->pkey_index;
-               wc->uqueue[head].slid = entry->slid;
-               wc->uqueue[head].sl = entry->sl;
-               wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-               wc->uqueue[head].port_num = entry->port_num;
-               /* Make sure entry is written before the head index. */
-               smp_wmb();
-       } else
-               wc->kqueue[head] = *entry;
-       wc->head = next;
-
-       if (cq->notify == IB_CQ_NEXT_COMP ||
-           (cq->notify == IB_CQ_SOLICITED && solicited)) {
-               cq->notify = IB_CQ_NONE;
-               cq->triggered++;
-               /*
-                * This will cause send_complete() to be called in
-                * another thread.
-                */
-               tasklet_hi_schedule(&cq->comptask);
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       if (entry->status != IB_WC_SUCCESS)
-               to_idev(cq->ibcq.device)->n_wqe_errs++;
-}
-
-/**
- * ipath_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       int npolled;
-       u32 tail;
-
-       /* The kernel can only poll a kernel completion queue */
-       if (cq->ip) {
-               npolled = -EINVAL;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       wc = cq->queue;
-       tail = wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-               if (tail == wc->head)
-                       break;
-               /* The kernel doesn't need a RMB since it has the lock. */
-               *entry = wc->kqueue[tail];
-               if (tail >= cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       wc->tail = tail;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-       return npolled;
-}
-
-static void send_complete(unsigned long data)
-{
-       struct ipath_cq *cq = (struct ipath_cq *)data;
-
-       /*
-        * The completion handler will most likely rearm the notification
-        * and poll for all pending entries.  If a new completion entry
-        * is added while we are in this routine, tasklet_hi_schedule()
-        * won't call us again until we return so we check triggered to
-        * see if we need to call the handler again.
-        */
-       for (;;) {
-               u8 triggered = cq->triggered;
-
-               cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-
-               if (cq->triggered == triggered)
-                       return;
-       }
-}
-
-/**
- * ipath_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the InfiniPath driver
- * @udata: unused by the InfiniPath driver
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cq *cq;
-       struct ipath_cq_wc *wc;
-       struct ib_cq *ret;
-       u32 sz;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (entries < 1 || entries > ib_ipath_max_cqes) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       /* Allocate the completion queue structure. */
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Allocate the completion queue entries and head/tail pointers.
-        * This is allocated separately so that it can be resized and
-        * also mapped into user space.
-        * We need to use vmalloc() in order to support mmap and large
-        * numbers of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-       else
-               sz += sizeof(struct ib_wc) * (entries + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_cq;
-       }
-
-       /*
-        * Return the address of the WC as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-
-               cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
-               if (!cq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wc;
-               }
-
-               err = ib_copy_to_udata(udata, &cq->ip->offset,
-                                      sizeof(cq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               cq->ip = NULL;
-
-       spin_lock(&dev->n_cqs_lock);
-       if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-               spin_unlock(&dev->n_cqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_cqs_allocated++;
-       spin_unlock(&dev->n_cqs_lock);
-
-       if (cq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       /*
-        * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-        * The number of entries should be >= the number requested or return
-        * an error.
-        */
-       cq->ibcq.cqe = entries;
-       cq->notify = IB_CQ_NONE;
-       cq->triggered = 0;
-       spin_lock_init(&cq->lock);
-       tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-       wc->head = 0;
-       wc->tail = 0;
-       cq->queue = wc;
-
-       ret = &cq->ibcq;
-
-       goto done;
-
-bail_ip:
-       kfree(cq->ip);
-bail_wc:
-       vfree(wc);
-bail_cq:
-       kfree(cq);
-done:
-       return ret;
-}
-
-/**
- * ipath_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int ipath_destroy_cq(struct ib_cq *ibcq)
-{
-       struct ipath_ibdev *dev = to_idev(ibcq->device);
-       struct ipath_cq *cq = to_icq(ibcq);
-
-       tasklet_kill(&cq->comptask);
-       spin_lock(&dev->n_cqs_lock);
-       dev->n_cqs_allocated--;
-       spin_unlock(&dev->n_cqs_lock);
-       if (cq->ip)
-               kref_put(&cq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(cq->queue);
-       kfree(cq);
-
-       return 0;
-}
-
-/**
- * ipath_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       unsigned long flags;
-       int ret = 0;
-
-       spin_lock_irqsave(&cq->lock, flags);
-       /*
-        * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-        * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-        */
-       if (cq->notify != IB_CQ_NEXT_COMP)
-               cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-       if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-           cq->queue->head != cq->queue->tail)
-               ret = 1;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return ret;
-}
-
-/**
- * ipath_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *old_wc;
-       struct ipath_cq_wc *wc;
-       u32 head, tail, n;
-       int ret;
-       u32 sz;
-
-       if (cqe < 1 || cqe > ib_ipath_max_cqes) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-       else
-               sz += sizeof(struct ib_wc) * (cqe + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       /* Check that we can write the offset to mmap. */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               __u64 offset = 0;
-
-               ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-               if (ret)
-                       goto bail_free;
-       }
-
-       spin_lock_irq(&cq->lock);
-       /*
-        * Make sure head and tail are sane since they
-        * might be user writable.
-        */
-       old_wc = cq->queue;
-       head = old_wc->head;
-       if (head > (u32) cq->ibcq.cqe)
-               head = (u32) cq->ibcq.cqe;
-       tail = old_wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       if (head < tail)
-               n = cq->ibcq.cqe + 1 + head - tail;
-       else
-               n = head - tail;
-       if (unlikely((u32)cqe < n)) {
-               ret = -EINVAL;
-               goto bail_unlock;
-       }
-       for (n = 0; tail != head; n++) {
-               if (cq->ip)
-                       wc->uqueue[n] = old_wc->uqueue[tail];
-               else
-                       wc->kqueue[n] = old_wc->kqueue[tail];
-               if (tail == (u32) cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       cq->ibcq.cqe = cqe;
-       wc->head = n;
-       wc->tail = 0;
-       cq->queue = wc;
-       spin_unlock_irq(&cq->lock);
-
-       vfree(old_wc);
-
-       if (cq->ip) {
-               struct ipath_ibdev *dev = to_idev(ibcq->device);
-               struct ipath_mmap_info *ip = cq->ip;
-
-               ipath_update_mmap_info(dev, ip, sz, wc);
-
-               /*
-                * Return the offset to mmap.
-                * See ipath_mmap() for details.
-                */
-               if (udata && udata->outlen >= sizeof(__u64)) {
-                       ret = ib_copy_to_udata(udata, &ip->offset,
-                                              sizeof(ip->offset));
-                       if (ret)
-                               goto bail;
-               }
-
-               spin_lock_irq(&dev->pending_lock);
-               if (list_empty(&ip->pending_mmaps))
-                       list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&cq->lock);
-bail_free:
-       vfree(wc);
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644 (file)
index 65926cd..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_DEBUG_H
-#define _IPATH_DEBUG_H
-
-#ifndef _IPATH_DEBUGGING       /* debugging enabled or not */
-#define _IPATH_DEBUGGING 1
-#endif
-
-#if _IPATH_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or disable
- * dynamically.  This can be set at modprobe time also:
- *      modprobe infinipath.ko infinipath_debug=7
- */
-
-#define __IPATH_INFO        0x1        /* generic low verbosity stuff */
-#define __IPATH_DBG         0x2        /* generic debug */
-#define __IPATH_TRSAMPLE    0x8        /* generate trace buffer sample entries */
-/* leave some low verbosity spots open */
-#define __IPATH_VERBDBG     0x40       /* very verbose debug */
-#define __IPATH_PKTDBG      0x80       /* print packet data */
-/* print process startup (init)/exit messages */
-#define __IPATH_PROCDBG     0x100
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG       0x200
-#define __IPATH_ERRPKTDBG   0x400
-#define __IPATH_USER_SEND   0x1000     /* use user mode send */
-#define __IPATH_KERNEL_SEND 0x2000     /* use kernel mode send */
-#define __IPATH_EPKTDBG     0x4000     /* print ethernet packet data */
-#define __IPATH_IPATHDBG    0x10000    /* Ethernet (IPATH) gen debug */
-#define __IPATH_IPATHWARN   0x20000    /* Ethernet (IPATH) warnings */
-#define __IPATH_IPATHERR    0x40000    /* Ethernet (IPATH) errors */
-#define __IPATH_IPATHPD     0x80000    /* Ethernet (IPATH) packet dump */
-#define __IPATH_IPATHTABLE  0x100000   /* Ethernet (IPATH) table dump */
-#define __IPATH_LINKVERBDBG 0x200000   /* very verbose linkchange debug */
-
-#else                          /* _IPATH_DEBUGGING */
-
-/*
- * define all of these even with debugging off, for the few places that do
- * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
- * compiler eliminate the code
- */
-
-#define __IPATH_INFO      0x0  /* generic low verbosity stuff */
-#define __IPATH_DBG       0x0  /* generic debug */
-#define __IPATH_TRSAMPLE  0x0  /* generate trace buffer sample entries */
-#define __IPATH_VERBDBG   0x0  /* very verbose debug */
-#define __IPATH_PKTDBG    0x0  /* print packet data */
-#define __IPATH_PROCDBG   0x0  /* process startup (init)/exit messages */
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG     0x0
-#define __IPATH_EPKTDBG   0x0  /* print ethernet packet data */
-#define __IPATH_IPATHDBG  0x0  /* Ethernet (IPATH) table dump on */
-#define __IPATH_IPATHWARN 0x0  /* Ethernet (IPATH) warnings on   */
-#define __IPATH_IPATHERR  0x0  /* Ethernet (IPATH) errors on   */
-#define __IPATH_IPATHPD   0x0  /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_LINKVERBDBG 0x0        /* very verbose linkchange debug */
-
-#endif                         /* _IPATH_DEBUGGING */
-
-#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
-
-#endif                         /* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644 (file)
index 45802e9..0000000
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the ipath_diag device, normally minor number 129.  Diagnostic use
- * of the InfiniPath chip may render the chip or board unusable until the
- * driver is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/export.h>
-#include <asm/uaccess.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-int ipath_diag_inuse;
-static int diag_set_link;
-
-static int ipath_diag_open(struct inode *in, struct file *fp);
-static int ipath_diag_release(struct inode *in, struct file *fp);
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off);
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off);
-
-static const struct file_operations diag_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diag_write,
-       .read = ipath_diag_read,
-       .open = ipath_diag_open,
-       .release = ipath_diag_release,
-       .llseek = default_llseek,
-};
-
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diagpkt_write,
-       .llseek = noop_llseek,
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev *diagpkt_cdev;
-static struct device *diagpkt_dev;
-
-int ipath_diag_add(struct ipath_devdata *dd)
-{
-       char name[16];
-       int ret = 0;
-
-       if (atomic_inc_return(&diagpkt_count) == 1) {
-               ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-                                     "ipath_diagpkt", &diagpkt_file_ops,
-                                     &diagpkt_cdev, &diagpkt_dev);
-
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
-                                     "device: %d", ret);
-                       goto done;
-               }
-       }
-
-       snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
-
-       ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-                             &diag_file_ops, &dd->diag_cdev,
-                             &dd->diag_dev);
-       if (ret)
-               ipath_dev_err(dd, "Couldn't create %s device: %d",
-                             name, ret);
-
-done:
-       return ret;
-}
-
-void ipath_diag_remove(struct ipath_devdata *dd)
-{
-       if (atomic_dec_and_test(&diagpkt_count))
-               ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
-
-       ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
-}
-
-/**
- * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy (multiple of 32 bits)
- *
- * This function also localizes all chip memory accesses.
- * The copy should be written such that we read full cacheline packets
- * from the chip.  This is usually used for a single qword
- *
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data = readq(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(u64))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: the number of bytes to copy (multiple of 32 bits)
- *
- * This is usually used for a single qword
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-
-static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writeq(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy
- *
- * read 32 bit values, not 64 bit; for memories that only
- * support 32 bit reads; usually a single dword.
- */
-static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       /* not very efficient, but it works for now */
-       while (reg_addr < reg_end) {
-               u32 data = readl(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: number of bytes to copy
- *
- * write 32 bit values, not 64 bit; for memories that only
- * support 32 bit write; usually a single dword.
- */
-
-static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u32 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writel(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-static int ipath_diag_open(struct inode *in, struct file *fp)
-{
-       int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
-       struct ipath_devdata *dd;
-       int ret;
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_diag_inuse) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       dd = ipath_lookup(unit);
-
-       if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       fp->private_data = dd;
-       ipath_diag_inuse = -2;
-       diag_set_link = 0;
-       ret = 0;
-
-       /* Only expose a way to reset the device if we
-          make it into diag mode. */
-       ipath_expose_reset(&dd->pcidev->dev);
-
-bail:
-       mutex_unlock(&ipath_mutex);
-
-       return ret;
-}
-
-/**
- * ipath_diagpkt_write - write an IB packet
- * @fp: the diag data device file pointer
- * @data: ipath_diag_pkt structure saying where to get the packet
- * @count: size of data to write
- * @off: unused by this code
- */
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off)
-{
-       u32 __iomem *piobuf;
-       u32 plen, pbufn, maxlen_reserve;
-       struct ipath_diag_pkt odp;
-       struct ipath_diag_xpkt dp;
-       u32 *tmpbuf = NULL;
-       struct ipath_devdata *dd;
-       ssize_t ret = 0;
-       u64 val;
-       u32 l_state, lt_state; /* LinkState, LinkTrainingState */
-
-
-       if (count == sizeof(dp)) {
-               if (copy_from_user(&dp, data, sizeof(dp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-       } else if (count == sizeof(odp)) {
-               if (copy_from_user(&odp, data, sizeof(odp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               dp.len = odp.len;
-               dp.unit = odp.unit;
-               dp.data = odp.data;
-               dp.pbc_wd = 0;
-       } else {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /* send count must be an exact number of dwords */
-       if (dp.len & 3) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = dp.len >> 2;
-
-       dd = ipath_lookup(dp.unit);
-       if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
-                          dp.unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (ipath_diag_inuse && !diag_set_link &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               diag_set_link = 1;
-               ipath_cdbg(VERBOSE, "Trying to set to set link active for "
-                          "diag pkt\n");
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-       }
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-       /*
-        * Want to skip check for l_state if using custom PBC,
-        * because we might be trying to force an SM packet out.
-        * first-cut, skip _all_ state checking in that case.
-        */
-       val = ipath_ib_state(dd, dd->ipath_lastibcstat);
-       lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-       l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
-           (val != dd->ib_init && val != dd->ib_arm &&
-           val != dd->ib_active))) {
-               ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
-                          dd->ipath_unit, (unsigned long long) val);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * need total length before first word written, plus 2 Dwords. One Dword
-        * is for padding so we get the full user data when not aligned on
-        * a word boundary. The other Dword is to make sure we have room for the
-        * ICRC which gets tacked on later.
-        */
-       maxlen_reserve = 2 * sizeof(u32);
-       if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
-               ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-                         dp.len, dd->ipath_ibmaxlen);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = sizeof(u32) + dp.len;
-
-       tmpbuf = vmalloc(plen);
-       if (!tmpbuf) {
-               dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
-                        "failing\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       if (copy_from_user(tmpbuf,
-                          (const void __user *) (unsigned long) dp.data,
-                          dp.len)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       plen >>= 2;             /* in dwords */
-
-       piobuf = ipath_getpiobuf(dd, plen, &pbufn);
-       if (!piobuf) {
-               ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
-                          dd->ipath_unit);
-               ret = -EBUSY;
-               goto bail;
-       }
-       /* disarm it just to be extra sure */
-       ipath_disarm_piobufs(dd, pbufn, 1);
-
-       if (ipath_debug & __IPATH_PKTDBG)
-               ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
-                          dd->ipath_unit, plen - 1, pbufn);
-
-       if (dp.pbc_wd == 0)
-               dp.pbc_wd = plen;
-       writeq(dp.pbc_wd, piobuf);
-       /*
-        * Copy all by the trigger word, then flush, so it's written
-        * to chip before trigger word, then write trigger word, then
-        * flush again, so packet is sent.
-        */
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
-               ipath_flush_wc();
-               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen);
-
-       ipath_flush_wc();
-
-       ret = sizeof(dp);
-
-bail:
-       vfree(tmpbuf);
-       return ret;
-}
-
-static int ipath_diag_release(struct inode *in, struct file *fp)
-{
-       mutex_lock(&ipath_mutex);
-       ipath_diag_inuse = 0;
-       fp->private_data = NULL;
-       mutex_unlock(&ipath_mutex);
-       return 0;
-}
-
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if (ipath_diag_inuse < 1 && (*off || count != 8))
-               ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit reads */
-               ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
-       else
-               ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -2)
-                       ipath_diag_inuse++;
-       }
-
-       return ret;
-}
-
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
-                ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
-               ret = -EINVAL;  /* before any other write allowed */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit writes */
-               ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
-       else
-               ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -1)
-                       ipath_diag_inuse = 1; /* all read/write OK now */
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644 (file)
index 123a8c0..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 ipath_dma_map_single(struct ib_device *dev,
-                               void *cpu_addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-       return (u64) cpu_addr;
-}
-
-static void ipath_dma_unmap_single(struct ib_device *dev,
-                                  u64 addr, size_t size,
-                                  enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 ipath_dma_map_page(struct ib_device *dev,
-                             struct page *page,
-                             unsigned long offset,
-                             size_t size,
-                             enum dma_data_direction direction)
-{
-       u64 addr;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       if (offset + size > PAGE_SIZE) {
-               addr = BAD_DMA_ADDRESS;
-               goto done;
-       }
-
-       addr = (u64) page_address(page);
-       if (addr)
-               addr += offset;
-       /* TODO: handle highmem pages */
-
-done:
-       return addr;
-}
-
-static void ipath_dma_unmap_page(struct ib_device *dev,
-                                u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                       int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64) page_address(sg_page(sg));
-               /* TODO: handle highmem pages */
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void ipath_unmap_sg(struct ib_device *dev,
-                          struct scatterlist *sg, int nents,
-                          enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static void ipath_sync_single_for_cpu(struct ib_device *dev,
-                                     u64 addr,
-                                     size_t size,
-                                     enum dma_data_direction dir)
-{
-}
-
-static void ipath_sync_single_for_device(struct ib_device *dev,
-                                        u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-}
-
-static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                     u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64) addr;
-       return addr;
-}
-
-static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
-                                   void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-       .mapping_error = ipath_mapping_error,
-       .map_single = ipath_dma_map_single,
-       .unmap_single = ipath_dma_unmap_single,
-       .map_page = ipath_dma_map_page,
-       .unmap_page = ipath_dma_unmap_page,
-       .map_sg = ipath_map_sg,
-       .unmap_sg = ipath_unmap_sg,
-       .sync_single_for_cpu = ipath_sync_single_for_cpu,
-       .sync_single_for_device = ipath_sync_single_for_device,
-       .alloc_coherent = ipath_dma_alloc_coherent,
-       .free_coherent = ipath_dma_free_coherent
-};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644 (file)
index 2ab22f9..0000000
+++ /dev/null
@@ -1,2784 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#ifdef CONFIG_X86_64
-#include <asm/pat.h>
-#endif
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-
-static void ipath_update_pio_bufs(struct ipath_devdata *);
-
-const char *ipath_get_unit_name(int unit)
-{
-       static char iname[16];
-       snprintf(iname, sizeof iname, "infinipath%u", unit);
-       return iname;
-}
-
-#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
-#define PFX IPATH_DRV_NAME ": "
-
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_ipath_version[] = IPATH_IDSTR "\n";
-
-static struct idr unit_table;
-DEFINE_SPINLOCK(ipath_devs_lock);
-LIST_HEAD(ipath_dev_list);
-
-wait_queue_head_t ipath_state_wait;
-
-unsigned ipath_debug = __IPATH_INFO;
-
-module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "mask for debug prints");
-EXPORT_SYMBOL_GPL(ipath_debug);
-
-unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
-module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
-MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
-
-static unsigned ipath_hol_timeout_ms = 13000;
-module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
-MODULE_PARM_DESC(hol_timeout_ms,
-       "duration of user app suspension after link failure");
-
-unsigned ipath_linkrecovery = 1;
-module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
-/*
- * Table to translate the LINKTRAININGSTATE portion of
- * IBCStatus to a human-readable form.
- */
-const char *ipath_ibcstatus_str[] = {
-       "Disabled",
-       "LinkUp",
-       "PollActive",
-       "PollQuiet",
-       "SleepDelay",
-       "SleepQuiet",
-       "LState6",              /* unused */
-       "LState7",              /* unused */
-       "CfgDebounce",
-       "CfgRcvfCfg",
-       "CfgWaitRmt",
-       "CfgIdle",
-       "RecovRetrain",
-       "CfgTxRevLane",         /* unused before IBA7220 */
-       "RecovWaitRmt",
-       "RecovIdle",
-       /* below were added for IBA7220 */
-       "CfgEnhanced",
-       "CfgTest",
-       "CfgWaitRmtTest",
-       "CfgWaitCfgEnhanced",
-       "SendTS_T",
-       "SendTstIdles",
-       "RcvTS_T",
-       "SendTst_TS1s",
-       "LTState18", "LTState19", "LTState1A", "LTState1B",
-       "LTState1C", "LTState1D", "LTState1E", "LTState1F"
-};
-
-static void ipath_remove_one(struct pci_dev *);
-static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
-
-/* Only needed for registration, nothing else needs this info */
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-
-/* Number of seconds before our card status check...  */
-#define STATUS_TIMEOUT 60
-
-static const struct pci_device_id ipath_pci_tbl[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
-       { 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
-
-static struct pci_driver ipath_driver = {
-       .name = IPATH_DRV_NAME,
-       .probe = ipath_init_one,
-       .remove = ipath_remove_one,
-       .id_table = ipath_pci_tbl,
-       .driver = {
-               .groups = ipath_driver_attr_groups,
-       },
-};
-
-static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
-                            u32 *bar0, u32 *bar1)
-{
-       int ret;
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar0 before enable: "
-                             "error %d\n", -ret);
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar1 before enable: "
-                             "error %d\n", -ret);
-
-       ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
-}
-
-static void ipath_free_devdata(struct pci_dev *pdev,
-                              struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       pci_set_drvdata(pdev, NULL);
-
-       if (dd->ipath_unit != -1) {
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-               idr_remove(&unit_table, dd->ipath_unit);
-               list_del(&dd->ipath_list);
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       }
-       vfree(dd);
-}
-
-static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
-{
-       unsigned long flags;
-       struct ipath_devdata *dd;
-       int ret;
-
-       dd = vzalloc(sizeof(*dd));
-       if (!dd) {
-               dd = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-       dd->ipath_unit = -1;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate unit ID: error %d\n", -ret);
-               ipath_free_devdata(pdev, dd);
-               dd = ERR_PTR(ret);
-               goto bail_unlock;
-       }
-       dd->ipath_unit = ret;
-
-       dd->pcidev = pdev;
-       pci_set_drvdata(pdev, dd);
-
-       list_add(&dd->ipath_list, &ipath_dev_list);
-
-bail_unlock:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       idr_preload_end();
-bail:
-       return dd;
-}
-
-static inline struct ipath_devdata *__ipath_lookup(int unit)
-{
-       return idr_find(&unit_table, unit);
-}
-
-struct ipath_devdata *ipath_lookup(int unit)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-       dd = __ipath_lookup(unit);
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return dd;
-}
-
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
-{
-       int nunits, npresent, nup;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       int maxports;
-
-       nunits = npresent = nup = maxports = 0;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               nunits++;
-               if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
-                       npresent++;
-               if (dd->ipath_lid &&
-                   !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
-                                        | IPATH_LINKUNK)))
-                       nup++;
-               if (dd->ipath_cfgports > maxports)
-                       maxports = dd->ipath_cfgports;
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       if (npresentp)
-               *npresentp = npresent;
-       if (nupp)
-               *nupp = nup;
-       if (maxportsp)
-               *maxportsp = maxports;
-
-       return nunits;
-}
-
-/*
- * These next two routines are placeholders in case we don't have per-arch
- * code for controlling write combining.  If explicit control of write
- * combining is not available, performance will probably be awful.
- */
-
-int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return -EOPNOTSUPP;
-}
-
-void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
-{
-}
-
-/*
- * Perform a PIO buffer bandwidth write test, to verify proper system
- * configuration.  Even when all the setup calls work, occasionally
- * BIOS or other issues can prevent write combining from working, or
- * can cause other bandwidth problems to the chip.
- *
- * This test simply writes the same buffer over and over again, and
- * measures close to the peak bandwidth to the chip (not testing
- * data bandwidth to the wire).   On chips that use an address-based
- * trigger to send packets to the wire, this is easy.  On chips that
- * use a count to trigger, we want to make sure that the packet doesn't
- * go out on the wire, or trigger flow control checks.
- */
-static void ipath_verify_pioperf(struct ipath_devdata *dd)
-{
-       u32 pbnum, cnt, lcnt;
-       u32 __iomem *piobuf;
-       u32 *addr;
-       u64 msecs, emsecs;
-
-       piobuf = ipath_getpiobuf(dd, 0, &pbnum);
-       if (!piobuf) {
-               dev_info(&dd->pcidev->dev,
-                       "No PIObufs for checking perf, skipping\n");
-               return;
-       }
-
-       /*
-        * Enough to give us a reasonable test, less than piobuf size, and
-        * likely multiple of store buffer length.
-        */
-       cnt = 1024;
-
-       addr = vmalloc(cnt);
-       if (!addr) {
-               dev_info(&dd->pcidev->dev,
-                       "Couldn't get memory for checking PIO perf,"
-                       " skipping\n");
-               goto done;
-       }
-
-       preempt_disable();  /* we want reasonably accurate elapsed time */
-       msecs = 1 + jiffies_to_msecs(jiffies);
-       for (lcnt = 0; lcnt < 10000U; lcnt++) {
-               /* wait until we cross msec boundary */
-               if (jiffies_to_msecs(jiffies) >= msecs)
-                       break;
-               udelay(1);
-       }
-
-       ipath_disable_armlaunch(dd);
-
-       /*
-        * length 0, no dwords actually sent, and mark as VL15
-        * on chips where that may matter (due to IB flowcontrol)
-        */
-       if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
-               writeq(1UL << 63, piobuf);
-       else
-               writeq(0, piobuf);
-       ipath_flush_wc();
-
-       /*
-        * this is only roughly accurate, since even with preempt we
-        * still take interrupts that could take a while.   Running for
-        * >= 5 msec seems to get us "close enough" to accurate values
-        */
-       msecs = jiffies_to_msecs(jiffies);
-       for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
-               __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
-               emsecs = jiffies_to_msecs(jiffies) - msecs;
-       }
-
-       /* 1 GiB/sec, slightly over IB SDR line rate */
-       if (lcnt < (emsecs * 1024U))
-               ipath_dev_err(dd,
-                       "Performance problem: bandwidth to PIO buffers is "
-                       "only %u MiB/sec\n",
-                       lcnt / (u32) emsecs);
-       else
-               ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
-                       lcnt / (u32) emsecs);
-
-       preempt_enable();
-
-       vfree(addr);
-
-done:
-       /* disarm piobuf, so it's available again */
-       ipath_disarm_piobufs(dd, pbnum, 1);
-       ipath_enable_armlaunch(dd);
-}
-
-static void cleanup_device(struct ipath_devdata *dd);
-
-static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-       int ret, len, j;
-       struct ipath_devdata *dd;
-       unsigned long long addr;
-       u32 bar0 = 0, bar1 = 0;
-
-#ifdef CONFIG_X86_64
-       if (pat_enabled()) {
-               pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-#endif
-
-       dd = ipath_alloc_devdata(pdev);
-       if (IS_ERR(dd)) {
-               ret = PTR_ERR(dd);
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate devdata: error %d\n", -ret);
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
-
-       ret = pci_enable_device(pdev);
-       if (ret) {
-               /* This can happen iff:
-                *
-                * We did a chip reset, and then failed to reprogram the
-                * BAR, or the chip reset due to an internal error.  We then
-                * unloaded the driver and reloaded it.
-                *
-                * Both reset cases set the BAR back to initial state.  For
-                * the latter case, the AER sticky error bit at offset 0x718
-                * should be set, but the Linux kernel doesn't yet know
-                * about that, it appears.  If the original BAR was retained
-                * in the kernel data structures, this may be OK.
-                */
-               ipath_dev_err(dd, "enable unit %d failed: error %d\n",
-                             dd->ipath_unit, -ret);
-               goto bail_devdata;
-       }
-       addr = pci_resource_start(pdev, 0);
-       len = pci_resource_len(pdev, 0);
-       ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
-                  "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
-                  ent->device, ent->driver_data);
-
-       read_bars(dd, pdev, &bar0, &bar1);
-
-       if (!bar1 && !(bar0 & ~0xf)) {
-               if (addr) {
-                       dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
-                                "rewriting as %llx\n", addr);
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_0, addr);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR0 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_1, addr >> 32);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR1 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-               } else {
-                       ipath_dev_err(dd, "BAR is 0 (probable RESET), "
-                                     "not usable until reboot\n");
-                       ret = -ENODEV;
-                       goto bail_disable;
-               }
-       }
-
-       ret = pci_request_regions(pdev, IPATH_DRV_NAME);
-       if (ret) {
-               dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
-                        "err %d\n", dd->ipath_unit, -ret);
-               goto bail_disable;
-       }
-
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (ret) {
-               /*
-                * if the 64 bit setup fails, try 32 bit.  Some systems
-                * do not setup 64 bit maps on systems with 2GB or less
-                * memory installed.
-                */
-               ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (ret) {
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA mask for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-                       goto bail_regions;
-               } else {
-                       ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
-                       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-                       if (ret)
-                               dev_info(&pdev->dev,
-                                       "Unable to set DMA consistent mask "
-                                       "for unit %u: %d\n",
-                                       dd->ipath_unit, ret);
-
-               }
-       } else {
-               ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (ret)
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA consistent mask "
-                               "for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-       }
-
-       pci_set_master(pdev);
-
-       /*
-        * Save BARs to rewrite after device reset.  Save all 64 bits of
-        * BAR, just in case.
-        */
-       dd->ipath_pcibar0 = addr;
-       dd->ipath_pcibar1 = addr >> 32;
-       dd->ipath_deviceid = ent->device;       /* save for later use */
-       dd->ipath_vendorid = ent->vendor;
-
-       /* setup the chip-specific functions, as early as possible. */
-       switch (ent->device) {
-       case PCI_DEVICE_ID_INFINIPATH_HT:
-               ipath_init_iba6110_funcs(dd);
-               break;
-
-       default:
-               ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-                             "failing\n", ent->device);
-               return -ENODEV;
-       }
-
-       for (j = 0; j < 6; j++) {
-               if (!pdev->resource[j].start)
-                       continue;
-               ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
-                          j, &pdev->resource[j],
-                          (unsigned long long)pci_resource_len(pdev, j));
-       }
-
-       if (!addr) {
-               ipath_dev_err(dd, "No valid address in BAR 0!\n");
-               ret = -ENODEV;
-               goto bail_regions;
-       }
-
-       dd->ipath_pcirev = pdev->revision;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       dd->ipath_kregbase = __ioremap(addr, len,
-               (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
-#else
-       /* XXX: split this properly to enable on PAT */
-       dd->ipath_kregbase = ioremap_nocache(addr, len);
-#endif
-
-       if (!dd->ipath_kregbase) {
-               ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
-                         addr);
-               ret = -ENOMEM;
-               goto bail_iounmap;
-       }
-       dd->ipath_kregend = (u64 __iomem *)
-               ((void __iomem *)dd->ipath_kregbase + len);
-       dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
-       /* for user mmap */
-       ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
-                  addr, dd->ipath_kregbase);
-
-       if (dd->ipath_f_bus(dd, pdev))
-               ipath_dev_err(dd, "Failed to setup config space; "
-                             "continuing anyway\n");
-
-       /*
-        * set up our interrupt handler; IRQF_SHARED probably not needed,
-        * since MSI interrupts shouldn't be shared but won't  hurt for now.
-        * check 0 irq after we return from chip-specific bus setup, since
-        * that can affect this due to setup
-        */
-       if (!dd->ipath_irq)
-               ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
-                             "work\n");
-       else {
-               ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
-                                 IPATH_DRV_NAME, dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't setup irq handler, "
-                                     "irq=%d: %d\n", dd->ipath_irq, ret);
-                       goto bail_iounmap;
-               }
-       }
-
-       ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
-       if (ret)
-               goto bail_irqsetup;
-
-       ret = ipath_enable_wc(dd);
-
-       if (ret)
-               ret = 0;
-
-       ipath_verify_pioperf(dd);
-
-       ipath_device_create_group(&pdev->dev, dd);
-       ipathfs_add_device(dd);
-       ipath_user_add(dd);
-       ipath_diag_add(dd);
-       ipath_register_ib_device(dd);
-
-       goto bail;
-
-bail_irqsetup:
-       cleanup_device(dd);
-
-       if (dd->ipath_irq)
-               dd->ipath_f_free_irq(dd);
-
-       if (dd->ipath_f_cleanup)
-               dd->ipath_f_cleanup(dd);
-
-bail_iounmap:
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-
-bail_regions:
-       pci_release_regions(pdev);
-
-bail_disable:
-       pci_disable_device(pdev);
-
-bail_devdata:
-       ipath_free_devdata(pdev, dd);
-
-bail:
-       return ret;
-}
-
-static void cleanup_device(struct ipath_devdata *dd)
-{
-       int port;
-       struct ipath_portdata **tmp;
-       unsigned long flags;
-
-       if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-               /* can't do anything more with chip; needs re-init */
-               *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-               if (dd->ipath_kregbase) {
-                       /*
-                        * if we haven't already cleaned up before these are
-                        * to ensure any register reads/writes "fail" until
-                        * re-init
-                        */
-                       dd->ipath_kregbase = NULL;
-                       dd->ipath_uregbase = 0;
-                       dd->ipath_sregbase = 0;
-                       dd->ipath_cregbase = 0;
-                       dd->ipath_kregsize = 0;
-               }
-               ipath_disable_wc(dd);
-       }
-
-       if (dd->ipath_spectriggerhit)
-               dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
-                        dd->ipath_spectriggerhit);
-
-       if (dd->ipath_pioavailregs_dma) {
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 (void *) dd->ipath_pioavailregs_dma,
-                                 dd->ipath_pioavailregs_phys);
-               dd->ipath_pioavailregs_dma = NULL;
-       }
-       if (dd->ipath_dummy_hdrq) {
-               dma_free_coherent(&dd->pcidev->dev,
-                       dd->ipath_pd[0]->port_rcvhdrq_size,
-                       dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-               dd->ipath_dummy_hdrq = NULL;
-       }
-
-       if (dd->ipath_pageshadow) {
-               struct page **tmpp = dd->ipath_pageshadow;
-               dma_addr_t *tmpd = dd->ipath_physshadow;
-               int i, cnt = 0;
-
-               ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-                          "locked\n");
-               for (port = 0; port < dd->ipath_cfgports; port++) {
-                       int port_tidbase = port * dd->ipath_rcvtidcnt;
-                       int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-                       for (i = port_tidbase; i < maxtid; i++) {
-                               if (!tmpp[i])
-                                       continue;
-                               pci_unmap_page(dd->pcidev, tmpd[i],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               ipath_release_user_pages(&tmpp[i], 1);
-                               tmpp[i] = NULL;
-                               cnt++;
-                       }
-               }
-               if (cnt) {
-                       ipath_stats.sps_pageunlocks += cnt;
-                       ipath_cdbg(VERBOSE, "There were still %u expTID "
-                                  "entries locked\n", cnt);
-               }
-               if (ipath_stats.sps_pagelocks ||
-                   ipath_stats.sps_pageunlocks)
-                       ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-                                  "unlocked via ipath_m{un}lock\n",
-                                  (unsigned long long)
-                                  ipath_stats.sps_pagelocks,
-                                  (unsigned long long)
-                                  ipath_stats.sps_pageunlocks);
-
-               ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-                          dd->ipath_pageshadow);
-               tmpp = dd->ipath_pageshadow;
-               dd->ipath_pageshadow = NULL;
-               vfree(tmpp);
-
-               dd->ipath_egrtidbase = NULL;
-       }
-
-       /*
-        * free any resources still in use (usually just kernel ports)
-        * at unload; we do for portcnt, because that's what we allocate.
-        * We acquire lock to be really paranoid that ipath_pd isn't being
-        * accessed from some interrupt-related code (that should not happen,
-        * but best to be sure).
-        */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       tmp = dd->ipath_pd;
-       dd->ipath_pd = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       for (port = 0; port < dd->ipath_portcnt; port++) {
-               struct ipath_portdata *pd = tmp[port];
-               tmp[port] = NULL; /* debugging paranoia */
-               ipath_free_pddata(dd, pd);
-       }
-       kfree(tmp);
-}
-
-static void ipath_remove_one(struct pci_dev *pdev)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(pdev);
-
-       ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
-
-       /*
-        * disable the IB link early, to be sure no new packets arrive, which
-        * complicates the shutdown process
-        */
-       ipath_shutdown_device(dd);
-
-       flush_workqueue(ib_wq);
-
-       if (dd->verbs_dev)
-               ipath_unregister_ib_device(dd->verbs_dev);
-
-       ipath_diag_remove(dd);
-       ipath_user_remove(dd);
-       ipathfs_remove_device(dd);
-       ipath_device_remove_group(&pdev->dev, dd);
-
-       ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
-                  "unit %u\n", dd, (u32) dd->ipath_unit);
-
-       cleanup_device(dd);
-
-       /*
-        * turn off rcv, send, and interrupts for all ports, all drivers
-        * should also hard reset the chip here?
-        * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-        * for all versions of the driver, if they were allocated
-        */
-       if (dd->ipath_irq) {
-               ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
-                          dd->ipath_unit, dd->ipath_irq);
-               dd->ipath_f_free_irq(dd);
-       } else
-               ipath_dbg("irq is 0, not doing free_irq "
-                         "for unit %u\n", dd->ipath_unit);
-       /*
-        * we check for NULL here, because it's outside
-        * the kregbase check, and we need to call it
-        * after the free_irq.  Thus it's possible that
-        * the function pointers were never initialized.
-        */
-       if (dd->ipath_f_cleanup)
-               /* clean up chip-specific stuff */
-               dd->ipath_f_cleanup(dd);
-
-       ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-       pci_release_regions(pdev);
-       ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
-       pci_disable_device(pdev);
-
-       ipath_free_devdata(pdev, dd);
-}
-
-/* general driver use */
-DEFINE_MUTEX(ipath_mutex);
-
-static DEFINE_SPINLOCK(ipath_pioavail_lock);
-
-/**
- * ipath_disarm_piobufs - cancel a range of PIO buffers
- * @dd: the infinipath device
- * @first: the first PIO buffer to cancel
- * @cnt: the number of PIO buffers to cancel
- *
- * cancel a range of PIO buffers, used when they might be armed, but
- * not triggered.  Used at init to ensure buffer state, and also user
- * process close, in case it died while writing to a PIO buffer
- * Also after errors.
- */
-void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
-                         unsigned cnt)
-{
-       unsigned i, last = first + cnt;
-       unsigned long flags;
-
-       ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-       for (i = first; i < last; i++) {
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               /*
-                * The disarm-related bits are write-only, so it
-                * is ok to OR them in with our copy of sendctrl
-                * while we hold the lock.
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl | INFINIPATH_S_DISARM |
-                       (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
-               /* can't disarm bufs back-to-back per iba7220 spec */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-       /* on some older chips, update may not happen after cancel */
-       ipath_force_pio_avail_update(dd);
-}
-
-/**
- * ipath_wait_linkstate - wait for an IB link state change to occur
- * @dd: the infinipath device
- * @state: the state to wait for
- * @msecs: the number of milliseconds to wait
- *
- * wait up to msecs milliseconds for IB link state change to occur for
- * now, take the easy polling route.  Currently used only by
- * ipath_set_linkstate.  Returns 0 if state reached, otherwise
- * -ETIMEDOUT state can have multiple states set, for any of several
- * transitions.
- */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
-{
-       dd->ipath_state_wanted = state;
-       wait_event_interruptible_timeout(ipath_state_wait,
-                                        (dd->ipath_flags & state),
-                                        msecs_to_jiffies(msecs));
-       dd->ipath_state_wanted = 0;
-
-       if (!(dd->ipath_flags & state)) {
-               u64 val;
-               ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
-                          " ms\n",
-                          /* test INIT ahead of DOWN, both can be set */
-                          (state & IPATH_LINKINIT) ? "INIT" :
-                          ((state & IPATH_LINKDOWN) ? "DOWN" :
-                           ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
-                          msecs);
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-               ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
-                          (unsigned long long) ipath_read_kreg64(
-                                  dd, dd->ipath_kregs->kr_ibcctrl),
-                          (unsigned long long) val,
-                          ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
-       }
-       return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
-}
-
-static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
-       char *buf, size_t blen)
-{
-       static const struct {
-               ipath_err_t err;
-               const char *msg;
-       } errs[] = {
-               { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
-               { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
-               { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
-               { INFINIPATH_E_SDMABASE, "SDmaBase" },
-               { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
-               { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
-               { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
-               { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
-               { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
-               { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
-               { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
-               { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
-       };
-       int i;
-       int expected;
-       size_t bidx = 0;
-
-       for (i = 0; i < ARRAY_SIZE(errs); i++) {
-               expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
-                       test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-               if ((err & errs[i].err) && !expected)
-                       bidx += snprintf(buf + bidx, blen - bidx,
-                                        "%s ", errs[i].msg);
-       }
-}
-
-/*
- * Decode the error status into strings, deciding whether to always
- * print * it or not depending on "normal packet errors" vs everything
- * else.   Return 1 if "real" errors, otherwise 0 if only packet
- * errors, so caller can decide what to print with the string.
- */
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-       ipath_err_t err)
-{
-       int iserr = 1;
-       *buf = '\0';
-       if (err & INFINIPATH_E_PKTERRS) {
-               if (!(err & ~INFINIPATH_E_PKTERRS))
-                       iserr = 0; // if only packet errors.
-               if (ipath_debug & __IPATH_ERRPKTDBG) {
-                       if (err & INFINIPATH_E_REBP)
-                               strlcat(buf, "EBP ", blen);
-                       if (err & INFINIPATH_E_RVCRC)
-                               strlcat(buf, "VCRC ", blen);
-                       if (err & INFINIPATH_E_RICRC) {
-                               strlcat(buf, "CRC ", blen);
-                               // clear for check below, so only once
-                               err &= INFINIPATH_E_RICRC;
-                       }
-                       if (err & INFINIPATH_E_RSHORTPKTLEN)
-                               strlcat(buf, "rshortpktlen ", blen);
-                       if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-                               strlcat(buf, "sdroppeddatapkt ", blen);
-                       if (err & INFINIPATH_E_SPKTLEN)
-                               strlcat(buf, "spktlen ", blen);
-               }
-               if ((err & INFINIPATH_E_RICRC) &&
-                       !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
-                       strlcat(buf, "CRC ", blen);
-               if (!iserr)
-                       goto done;
-       }
-       if (err & INFINIPATH_E_RHDRLEN)
-               strlcat(buf, "rhdrlen ", blen);
-       if (err & INFINIPATH_E_RBADTID)
-               strlcat(buf, "rbadtid ", blen);
-       if (err & INFINIPATH_E_RBADVERSION)
-               strlcat(buf, "rbadversion ", blen);
-       if (err & INFINIPATH_E_RHDR)
-               strlcat(buf, "rhdr ", blen);
-       if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
-               strlcat(buf, "sendspecialtrigger ", blen);
-       if (err & INFINIPATH_E_RLONGPKTLEN)
-               strlcat(buf, "rlongpktlen ", blen);
-       if (err & INFINIPATH_E_RMAXPKTLEN)
-               strlcat(buf, "rmaxpktlen ", blen);
-       if (err & INFINIPATH_E_RMINPKTLEN)
-               strlcat(buf, "rminpktlen ", blen);
-       if (err & INFINIPATH_E_SMINPKTLEN)
-               strlcat(buf, "sminpktlen ", blen);
-       if (err & INFINIPATH_E_RFORMATERR)
-               strlcat(buf, "rformaterr ", blen);
-       if (err & INFINIPATH_E_RUNSUPVL)
-               strlcat(buf, "runsupvl ", blen);
-       if (err & INFINIPATH_E_RUNEXPCHAR)
-               strlcat(buf, "runexpchar ", blen);
-       if (err & INFINIPATH_E_RIBFLOW)
-               strlcat(buf, "ribflow ", blen);
-       if (err & INFINIPATH_E_SUNDERRUN)
-               strlcat(buf, "sunderrun ", blen);
-       if (err & INFINIPATH_E_SPIOARMLAUNCH)
-               strlcat(buf, "spioarmlaunch ", blen);
-       if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
-               strlcat(buf, "sunexperrpktnum ", blen);
-       if (err & INFINIPATH_E_SDROPPEDSMPPKT)
-               strlcat(buf, "sdroppedsmppkt ", blen);
-       if (err & INFINIPATH_E_SMAXPKTLEN)
-               strlcat(buf, "smaxpktlen ", blen);
-       if (err & INFINIPATH_E_SUNSUPVL)
-               strlcat(buf, "sunsupVL ", blen);
-       if (err & INFINIPATH_E_INVALIDADDR)
-               strlcat(buf, "invalidaddr ", blen);
-       if (err & INFINIPATH_E_RRCVEGRFULL)
-               strlcat(buf, "rcvegrfull ", blen);
-       if (err & INFINIPATH_E_RRCVHDRFULL)
-               strlcat(buf, "rcvhdrfull ", blen);
-       if (err & INFINIPATH_E_IBSTATUSCHANGED)
-               strlcat(buf, "ibcstatuschg ", blen);
-       if (err & INFINIPATH_E_RIBLOSTLINK)
-               strlcat(buf, "riblostlink ", blen);
-       if (err & INFINIPATH_E_HARDWARE)
-               strlcat(buf, "hardware ", blen);
-       if (err & INFINIPATH_E_RESET)
-               strlcat(buf, "reset ", blen);
-       if (err & INFINIPATH_E_SDMAERRS)
-               decode_sdma_errs(dd, err, buf, blen);
-       if (err & INFINIPATH_E_INVALIDEEPCMD)
-               strlcat(buf, "invalideepromcmd ", blen);
-done:
-       return iserr;
-}
-
-/**
- * get_rhf_errstring - decode RHF errors
- * @err: the err number
- * @msg: the output buffer
- * @len: the length of the output buffer
- *
- * only used one place now, may want more later
- */
-static void get_rhf_errstring(u32 err, char *msg, size_t len)
-{
-       /* if no errors, and so don't need to check what's first */
-       *msg = '\0';
-
-       if (err & INFINIPATH_RHF_H_ICRCERR)
-               strlcat(msg, "icrcerr ", len);
-       if (err & INFINIPATH_RHF_H_VCRCERR)
-               strlcat(msg, "vcrcerr ", len);
-       if (err & INFINIPATH_RHF_H_PARITYERR)
-               strlcat(msg, "parityerr ", len);
-       if (err & INFINIPATH_RHF_H_LENERR)
-               strlcat(msg, "lenerr ", len);
-       if (err & INFINIPATH_RHF_H_MTUERR)
-               strlcat(msg, "mtuerr ", len);
-       if (err & INFINIPATH_RHF_H_IHDRERR)
-               /* infinipath hdr checksum error */
-               strlcat(msg, "ipathhdrerr ", len);
-       if (err & INFINIPATH_RHF_H_TIDERR)
-               strlcat(msg, "tiderr ", len);
-       if (err & INFINIPATH_RHF_H_MKERR)
-               /* bad port, offset, etc. */
-               strlcat(msg, "invalid ipathhdr ", len);
-       if (err & INFINIPATH_RHF_H_IBERR)
-               strlcat(msg, "iberr ", len);
-       if (err & INFINIPATH_RHF_L_SWA)
-               strlcat(msg, "swA ", len);
-       if (err & INFINIPATH_RHF_L_SWB)
-               strlcat(msg, "swB ", len);
-}
-
-/**
- * ipath_get_egrbuf - get an eager buffer
- * @dd: the infinipath device
- * @bufnum: the eager buffer to get
- *
- * must only be called if ipath_pd[port] is known to be allocated
- */
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
-{
-       return dd->ipath_port0_skbinfo ?
-               (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
-}
-
-/**
- * ipath_alloc_skb - allocate an skb and buffer with possible constraints
- * @dd: the infinipath device
- * @gfp_mask: the sk_buff SFP mask
- */
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
-                               gfp_t gfp_mask)
-{
-       struct sk_buff *skb;
-       u32 len;
-
-       /*
-        * Only fully supported way to handle this is to allocate lots
-        * extra, align as needed, and then do skb_reserve().  That wastes
-        * a lot of memory...  I'll have to hack this into infinipath_copy
-        * also.
-        */
-
-       /*
-        * We need 2 extra bytes for ipath_ether data sent in the
-        * key header.  In order to keep everything dword aligned,
-        * we'll reserve 4 bytes.
-        */
-       len = dd->ipath_ibmaxlen + 4;
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               /* We need a 2KB multiple alignment, and there is no way
-                * to do it except to allocate extra and then skb_reserve
-                * enough to bring it up to the right alignment.
-                */
-               len += 2047;
-       }
-
-       skb = __dev_alloc_skb(len, gfp_mask);
-       if (!skb) {
-               ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
-                             len);
-               goto bail;
-       }
-
-       skb_reserve(skb, 4);
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               u32 una = (unsigned long)skb->data & 2047;
-               if (una)
-                       skb_reserve(skb, 2048 - una);
-       }
-
-bail:
-       return skb;
-}
-
-static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
-                            u32 eflags,
-                            u32 l,
-                            u32 etail,
-                            __le32 *rhf_addr,
-                            struct ipath_message_header *hdr)
-{
-       char emsg[128];
-
-       get_rhf_errstring(eflags, emsg, sizeof emsg);
-       ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
-                  "tlen=%x opcode=%x egridx=%x: %s\n",
-                  eflags, l,
-                  ipath_hdrget_rcv_type(rhf_addr),
-                  ipath_hdrget_length_in_bytes(rhf_addr),
-                  be32_to_cpu(hdr->bth[0]) >> 24,
-                  etail, emsg);
-
-       /* Count local link integrity errors. */
-       if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
-               u8 n = (dd->ipath_ibcctrl >>
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-
-               if (++dd->ipath_lli_counter > n) {
-                       dd->ipath_lli_counter = 0;
-                       dd->ipath_lli_errors++;
-               }
-       }
-}
-
-/*
- * ipath_kreceive - receive a packet
- * @pd: the infinipath port
- *
- * called from interrupt handler for errors or receive interrupt
- */
-void ipath_kreceive(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       __le32 *rhf_addr;
-       void *ebuf;
-       const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
-       const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
-       u32 etail = -1, l, hdrqtail;
-       struct ipath_message_header *hdr;
-       u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
-       static u64 totcalls;    /* stats, may eventually remove */
-       int last;
-
-       l = pd->port_head;
-       rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               u32 seq = ipath_hdrget_seq(rhf_addr);
-
-               if (seq != pd->port_seq_cnt)
-                       goto bail;
-               hdrqtail = 0;
-       } else {
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-               if (l == hdrqtail)
-                       goto bail;
-               smp_rmb();
-       }
-
-reloop:
-       for (last = 0, i = 1; !last; i += !last) {
-               hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
-               eflags = ipath_hdrget_err_flags(rhf_addr);
-               etype = ipath_hdrget_rcv_type(rhf_addr);
-               /* total length */
-               tlen = ipath_hdrget_length_in_bytes(rhf_addr);
-               ebuf = NULL;
-               if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
-                   ipath_hdrget_use_egr_buf(rhf_addr) :
-                   (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-                       /*
-                        * It turns out that the chip uses an eager buffer
-                        * for all non-expected packets, whether it "needs"
-                        * one or not.  So always get the index, but don't
-                        * set ebuf (so we try to copy data) unless the
-                        * length requires it.
-                        */
-                       etail = ipath_hdrget_index(rhf_addr);
-                       updegr = 1;
-                       if (tlen > sizeof(*hdr) ||
-                           etype == RCVHQ_RCV_TYPE_NON_KD)
-                               ebuf = ipath_get_egrbuf(dd, etail);
-               }
-
-               /*
-                * both tiderr and ipathhdrerr are set for all plain IB
-                * packets; only ipathhdrerr should be set.
-                */
-
-               if (etype != RCVHQ_RCV_TYPE_NON_KD &&
-                   etype != RCVHQ_RCV_TYPE_ERROR &&
-                   ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
-                   IPS_PROTO_VERSION)
-                       ipath_cdbg(PKT, "Bad InfiniPath protocol version "
-                                  "%x\n", etype);
-
-               if (unlikely(eflags))
-                       ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
-               else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-                       ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
-                       if (dd->ipath_lli_counter)
-                               dd->ipath_lli_counter--;
-               } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-                       u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
-                       u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
-                       ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-                                  "qp=%x), len %x; ignored\n",
-                                  etype, opcode, qp, tlen);
-               } else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
-                       ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-                                 be32_to_cpu(hdr->bth[0]) >> 24);
-               } else {
-                       /*
-                        * error packet, type of error unknown.
-                        * Probably type 3, but we don't know, so don't
-                        * even try to print the opcode, etc.
-                        * Usually caused by a "bad packet", that has no
-                        * BTH, when the LRH says it should.
-                        */
-                       ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
-                                 " %x, len %x hdrq+%x rhf: %Lx\n",
-                                 etail, tlen, l, (unsigned long long)
-                                 le64_to_cpu(*(__le64 *) rhf_addr));
-                       if (ipath_debug & __IPATH_ERRPKTDBG) {
-                               u32 j, *d, dw = rsize-2;
-                               if (rsize > (tlen>>2))
-                                       dw = tlen>>2;
-                               d = (u32 *)hdr;
-                               printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
-                                       dw);
-                               for (j = 0; j < dw; j++)
-                                       printk(KERN_DEBUG "%8x%s", d[j],
-                                               (j%8) == 7 ? "\n" : " ");
-                               printk(KERN_DEBUG ".\n");
-                       }
-               }
-               l += rsize;
-               if (l >= maxcnt)
-                       l = 0;
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       l + dd->ipath_rhf_offset;
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-                       u32 seq = ipath_hdrget_seq(rhf_addr);
-
-                       if (++pd->port_seq_cnt > 13)
-                               pd->port_seq_cnt = 1;
-                       if (seq != pd->port_seq_cnt)
-                               last = 1;
-               } else if (l == hdrqtail) {
-                       last = 1;
-               }
-               /*
-                * update head regs on last packet, and every 16 packets.
-                * Reduce bus traffic, while still trying to prevent
-                * rcvhdrq overflows, for when the queue is nearly full
-                */
-               if (last || !(i & 0xf)) {
-                       u64 lval = l;
-
-                       /* request IBA6120 and 7220 interrupt only on last */
-                       if (last)
-                               lval |= dd->ipath_rhdrhead_intr_off;
-                       ipath_write_ureg(dd, ur_rcvhdrhead, lval,
-                               pd->port_port);
-                       if (updegr) {
-                               ipath_write_ureg(dd, ur_rcvegrindexhead,
-                                                etail, pd->port_port);
-                               updegr = 0;
-                       }
-               }
-       }
-
-       if (!dd->ipath_rhdrhead_intr_off && !reloop &&
-           !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               /* IBA6110 workaround; we can have a race clearing chip
-                * interrupt with another interrupt about to be delivered,
-                * and can clear it before it is delivered on the GPIO
-                * workaround.  By doing the extra check here for the
-                * in-memory tail register updating while we were doing
-                * earlier packets, we "almost" guarantee we have covered
-                * that case.
-                */
-               u32 hqtail = ipath_get_rcvhdrtail(pd);
-               if (hqtail != hdrqtail) {
-                       hdrqtail = hqtail;
-                       reloop = 1; /* loop 1 extra time at most */
-                       goto reloop;
-               }
-       }
-
-       pkttot += i;
-
-       pd->port_head = l;
-
-       if (pkttot > ipath_stats.sps_maxpkts_call)
-               ipath_stats.sps_maxpkts_call = pkttot;
-       ipath_stats.sps_port0pkts += pkttot;
-       ipath_stats.sps_avgpkts_call =
-               ipath_stats.sps_port0pkts / ++totcalls;
-
-bail:;
-}
-
-/**
- * ipath_update_pio_bufs - update shadow copy of the PIO availability map
- * @dd: the infinipath device
- *
- * called whenever our local copy indicates we have run out of send buffers
- * NOTE: This can be called from interrupt context by some code
- * and from non-interrupt context by ipath_getpiobuf().
- */
-
-static void ipath_update_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int i;
-       const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
-
-       /* If the generation (check) bits have changed, then we update the
-        * busy bit for the corresponding PIO buffer.  This algorithm will
-        * modify positions to the value they already have in some cases
-        * (i.e., no change), but it's faster than changing only the bits
-        * that have changed.
-        *
-        * We would like to do this atomicly, to avoid spinlocks in the
-        * critical send path, but that's not really possible, given the
-        * type of changes, and that this routine could be called on
-        * multiple cpu's simultaneously, so we lock in this routine only,
-        * to avoid conflicting updates; all we change is the shadow, and
-        * it's a single 64 bit memory location, so by definition the update
-        * is atomic in terms of what other cpu's can see in testing the
-        * bits.  The spin_lock overhead isn't too bad, since it only
-        * happens when all buffers are in use, so only cpu overhead, not
-        * latency or bandwidth is affected.
-        */
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
-               return;
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               /* only if packet debug and verbose */
-               volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-               unsigned long *shadow = dd->ipath_pioavailshadow;
-
-               ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
-                          "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
-                          "s3=%lx\n",
-                          (unsigned long long) le64_to_cpu(dma[0]),
-                          shadow[0],
-                          (unsigned long long) le64_to_cpu(dma[1]),
-                          shadow[1],
-                          (unsigned long long) le64_to_cpu(dma[2]),
-                          shadow[2],
-                          (unsigned long long) le64_to_cpu(dma[3]),
-                          shadow[3]);
-               if (piobregs > 4)
-                       ipath_cdbg(
-                               PKT, "2nd group, dma4=%llx shad4=%lx, "
-                               "d5=%llx s5=%lx, d6=%llx s6=%lx, "
-                               "d7=%llx s7=%lx\n",
-                               (unsigned long long) le64_to_cpu(dma[4]),
-                               shadow[4],
-                               (unsigned long long) le64_to_cpu(dma[5]),
-                               shadow[5],
-                               (unsigned long long) le64_to_cpu(dma[6]),
-                               shadow[6],
-                               (unsigned long long) le64_to_cpu(dma[7]),
-                               shadow[7]);
-       }
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < piobregs; i++) {
-               u64 pchbusy, pchg, piov, pnew;
-               /*
-                * Chip Errata: bug 6641; even and odd qwords>3 are swapped
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
-               else
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-               pchg = dd->ipath_pioavailkernel[i] &
-                       ~(dd->ipath_pioavailshadow[i] ^ piov);
-               pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
-               if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
-                       pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
-                       pnew |= piov & pchbusy;
-                       dd->ipath_pioavailshadow[i] = pnew;
-               }
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/*
- * used to force update of pioavailshadow if we can't get a pio buffer.
- * Needed primarily due to exitting freeze mode after recovering
- * from errors.  Done lazily, because it's safer (known to not
- * be writing pio buffers).
- */
-static void ipath_reset_availshadow(struct ipath_devdata *dd)
-{
-       int i, im;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               u64 val, oldval;
-               /* deal with 6110 chip bug on high register #s */
-               im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                       i ^ 1 : i;
-               val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
-               /*
-                * busy out the buffers not in the kernel avail list,
-                * without changing the generation bits.
-                */
-               oldval = dd->ipath_pioavailshadow[i];
-               dd->ipath_pioavailshadow[i] = val |
-                       ((~dd->ipath_pioavailkernel[i] <<
-                       INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
-                       0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
-               if (oldval != dd->ipath_pioavailshadow[i])
-                       ipath_dbg("shadow[%d] was %Lx, now %lx\n",
-                               i, (unsigned long long) oldval,
-                               dd->ipath_pioavailshadow[i]);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/**
- * ipath_setrcvhdrsize - set the receive header size
- * @dd: the infinipath device
- * @rhdrsize: the receive header size
- *
- * called from user init code, and also layered driver init
- */
-int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
-{
-       int ret = 0;
-
-       if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
-               if (dd->ipath_rcvhdrsize != rhdrsize) {
-                       dev_info(&dd->pcidev->dev,
-                                "Error: can't set protocol header "
-                                "size %u, already %u\n",
-                                rhdrsize, dd->ipath_rcvhdrsize);
-                       ret = -EAGAIN;
-               } else
-                       ipath_cdbg(VERBOSE, "Reuse same protocol header "
-                                  "size %u\n", dd->ipath_rcvhdrsize);
-       } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
-                              (sizeof(u64) / sizeof(u32)))) {
-               ipath_dbg("Error: can't set protocol header size %u "
-                         "(> max %u)\n", rhdrsize,
-                         dd->ipath_rcvhdrentsize -
-                         (u32) (sizeof(u64) / sizeof(u32)));
-               ret = -EOVERFLOW;
-       } else {
-               dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
-               dd->ipath_rcvhdrsize = rhdrsize;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                                dd->ipath_rcvhdrsize);
-               ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
-                          dd->ipath_rcvhdrsize);
-       }
-       return ret;
-}
-
-/*
- * debugging code and stats updates if no pio buffers available.
- */
-static noinline void no_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
-
-       dd->ipath_upd_pio_shadow = 1;
-
-       /*
-        * not atomic, but if we lose a stat count in a while, that's OK
-        */
-       ipath_stats.sps_nopiobufs++;
-       if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-               ipath_force_pio_avail_update(dd); /* at start */
-               ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
-                       "%llx %llx %llx %llx\n"
-                       "ipath  shadow:  %lx %lx %lx %lx\n",
-                       dd->ipath_consec_nopiobuf,
-                       (unsigned long)get_cycles(),
-                       (unsigned long long) le64_to_cpu(dma[0]),
-                       (unsigned long long) le64_to_cpu(dma[1]),
-                       (unsigned long long) le64_to_cpu(dma[2]),
-                       (unsigned long long) le64_to_cpu(dma[3]),
-                       shadow[0], shadow[1], shadow[2], shadow[3]);
-               /*
-                * 4 buffers per byte, 4 registers above, cover rest
-                * below
-                */
-               if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-                   (sizeof(shadow[0]) * 4 * 4))
-                       ipath_dbg("2nd group: dmacopy: "
-                                 "%llx %llx %llx %llx\n"
-                                 "ipath  shadow:  %lx %lx %lx %lx\n",
-                                 (unsigned long long)le64_to_cpu(dma[4]),
-                                 (unsigned long long)le64_to_cpu(dma[5]),
-                                 (unsigned long long)le64_to_cpu(dma[6]),
-                                 (unsigned long long)le64_to_cpu(dma[7]),
-                                 shadow[4], shadow[5], shadow[6], shadow[7]);
-
-               /* at end, so update likely happened */
-               ipath_reset_availshadow(dd);
-       }
-}
-
-/*
- * common code for normal driver pio buffer allocation, and reserved
- * allocation.
- *
- * do appropriate marking as busy, etc.
- * returns buffer number if one found (>=0), negative number is error.
- */
-static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
-       u32 *pbufnum, u32 first, u32 last, u32 firsti)
-{
-       int i, j, updated = 0;
-       unsigned piobcnt;
-       unsigned long flags;
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       u32 __iomem *buf;
-
-       piobcnt = last - first;
-       if (dd->ipath_upd_pio_shadow) {
-               /*
-                * Minor optimization.  If we had no buffers on last call,
-                * start out by doing the update; continue and do scan even
-                * if no buffers were updated, to be paranoid
-                */
-               ipath_update_pio_bufs(dd);
-               updated++;
-               i = first;
-       } else
-               i = firsti;
-rescan:
-       /*
-        * while test_and_set_bit() is atomic, we do that and then the
-        * change_bit(), and the pair is not.  See if this is the cause
-        * of the remaining armlaunch errors.
-        */
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (j = 0; j < piobcnt; j++, i++) {
-               if (i >= last)
-                       i = first;
-               if (__test_and_set_bit((2 * i) + 1, shadow))
-                       continue;
-               /* flip generation bit */
-               __change_bit(2 * i, shadow);
-               break;
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       if (j == piobcnt) {
-               if (!updated) {
-                       /*
-                        * first time through; shadow exhausted, but may be
-                        * buffers available, try an update and then rescan.
-                        */
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               } else if (updated == 1 && piobcnt <=
-                       ((dd->ipath_sendctrl
-                       >> INFINIPATH_S_UPDTHRESH_SHIFT) &
-                       INFINIPATH_S_UPDTHRESH_MASK)) {
-                       /*
-                        * for chips supporting and using the update
-                        * threshold we need to force an update of the
-                        * in-memory copy if the count is less than the
-                        * thershold, then check one more time.
-                        */
-                       ipath_force_pio_avail_update(dd);
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               }
-
-               no_pio_bufs(dd);
-               buf = NULL;
-       } else {
-               if (i < dd->ipath_piobcnt2k)
-                       buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-                                              i * dd->ipath_palign);
-               else
-                       buf = (u32 __iomem *)
-                               (dd->ipath_pio4kbase +
-                                (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-               if (pbufnum)
-                       *pbufnum = i;
-       }
-
-       return buf;
-}
-
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @plen: the size of the PIO buffer needed in 32-bit words
- * @pbufnum: the buffer number is placed here
- */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
-{
-       u32 __iomem *buf;
-       u32 pnum, nbufs;
-       u32 first, lasti;
-
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
-               first = dd->ipath_piobcnt2k;
-               lasti = dd->ipath_lastpioindexl;
-       } else {
-               first = 0;
-               lasti = dd->ipath_lastpioindex;
-       }
-       nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
-
-       if (buf) {
-               /*
-                * Set next starting place.  It's just an optimization,
-                * it doesn't matter who wins on this, so no locking
-                */
-               if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-                       dd->ipath_lastpioindexl = pnum + 1;
-               else
-                       dd->ipath_lastpioindex = pnum + 1;
-               if (dd->ipath_upd_pio_shadow)
-                       dd->ipath_upd_pio_shadow = 0;
-               if (dd->ipath_consec_nopiobuf)
-                       dd->ipath_consec_nopiobuf = 0;
-               ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-                          pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-               if (pbufnum)
-                       *pbufnum = pnum;
-
-       }
-       return buf;
-}
-
-/**
- * ipath_chg_pioavailkernel - change which send buffers are available for kernel
- * @dd: the infinipath device
- * @start: the starting send buffer number
- * @len: the number of send buffers
- * @avail: true if the buffers are available for kernel use, false otherwise
- */
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                             unsigned len, int avail)
-{
-       unsigned long flags;
-       unsigned end, cnt = 0;
-
-       /* There are two bits per send buffer (busy and generation) */
-       start *= 2;
-       end = start + len * 2;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       /* Set or clear the busy bit in the shadow. */
-       while (start < end) {
-               if (avail) {
-                       unsigned long dma;
-                       int i, im;
-                       /*
-                        * the BUSY bit will never be set, because we disarm
-                        * the user buffers before we hand them back to the
-                        * kernel.  We do have to make sure the generation
-                        * bit is set correctly in shadow, since it could
-                        * have changed many times while allocated to user.
-                        * We can't use the bitmap functions on the full
-                        * dma array because it is always little-endian, so
-                        * we have to flip to host-order first.
-                        * BITS_PER_LONG is slightly wrong, since it's
-                        * always 64 bits per register in chip...
-                        * We only work on 64 bit kernels, so that's OK.
-                        */
-                       /* deal with 6110 chip bug on high register #s */
-                       i = start / BITS_PER_LONG;
-                       im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                               i ^ 1 : i;
-                       __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
-                               + start, dd->ipath_pioavailshadow);
-                       dma = (unsigned long) le64_to_cpu(
-                               dd->ipath_pioavailregs_dma[im]);
-                       if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                               + start) % BITS_PER_LONG, &dma))
-                               __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       else
-                               __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       __set_bit(start, dd->ipath_pioavailkernel);
-               } else {
-                       __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-                               dd->ipath_pioavailshadow);
-                       __clear_bit(start, dd->ipath_pioavailkernel);
-               }
-               start += 2;
-       }
-
-       if (dd->ipath_pioupd_thresh) {
-               end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-               cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       /*
-        * When moving buffers from kernel to user, if number assigned to
-        * the user is less than the pio update threshold, and threshold
-        * is supported (cnt was computed > 0), drop the update threshold
-        * so we update at least once per allocated number of buffers.
-        * In any case, if the kernel buffers are less than the threshold,
-        * drop the threshold.  We don't bother increasing it, having once
-        * decreased it, since it would typically just cycle back and forth.
-        * If we don't decrease below buffers in use, we can wait a long
-        * time for an update, until some other context uses PIO buffers.
-        */
-       if (!avail && len < cnt)
-               cnt = len;
-       if (cnt < dd->ipath_pioupd_thresh) {
-               dd->ipath_pioupd_thresh = cnt;
-               ipath_dbg("Decreased pio update threshold to %u\n",
-                       dd->ipath_pioupd_thresh);
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-                       << INFINIPATH_S_UPDTHRESH_SHIFT);
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_create_rcvhdrq - create a receive header queue
- * @dd: the infinipath device
- * @pd: the port data
- *
- * this must be contiguous memory (from an i/o perspective), and must be
- * DMA'able (which means for some systems, it will go through an IOMMU,
- * or be forced into a low address range).
- */
-int ipath_create_rcvhdrq(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd)
-{
-       int ret = 0;
-
-       if (!pd->port_rcvhdrq) {
-               dma_addr_t phys_hdrqtail;
-               gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-               int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                               sizeof(u32), PAGE_SIZE);
-
-               pd->port_rcvhdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
-                       gfp_flags);
-
-               if (!pd->port_rcvhdrq) {
-                       ipath_dev_err(dd, "attempt to allocate %d bytes "
-                                     "for port %u rcvhdrq failed\n",
-                                     amt, pd->port_port);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-                       pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-                               &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-                               GFP_KERNEL);
-                       if (!pd->port_rcvhdrtail_kvaddr) {
-                               ipath_dev_err(dd, "attempt to allocate 1 page "
-                                       "for port %u rcvhdrqtailaddr "
-                                       "failed\n", pd->port_port);
-                               ret = -ENOMEM;
-                               dma_free_coherent(&dd->pcidev->dev, amt,
-                                       pd->port_rcvhdrq,
-                                       pd->port_rcvhdrq_phys);
-                               pd->port_rcvhdrq = NULL;
-                               goto bail;
-                       }
-                       pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
-                       ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
-                                  "physical\n", pd->port_port,
-                                  (unsigned long long) phys_hdrqtail);
-               }
-
-               pd->port_rcvhdrq_size = amt;
-
-               ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
-                          "for port %u rcvhdr Q\n",
-                          amt >> PAGE_SHIFT, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_phys,
-                          (unsigned long) pd->port_rcvhdrq_size,
-                          pd->port_port);
-       } else {
-               ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
-                          "hdrtailaddr@%p %llx physical\n",
-                          pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long long) pd->port_rcvhdrq_phys,
-                          pd->port_rcvhdrtail_kvaddr, (unsigned long long)
-                          pd->port_rcvhdrqtailaddr_phys);
-       }
-       /* clear for security and sanity on each use */
-       memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-       if (pd->port_rcvhdrtail_kvaddr)
-               memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
-
-       /*
-        * tell chip each time we init it, even if we are re-using previous
-        * memory (we zero the register at process close)
-        */
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
-                             pd->port_port, pd->port_rcvhdrqtailaddr_phys);
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                             pd->port_port, pd->port_rcvhdrq_phys);
-
-bail:
-       return ret;
-}
-
-
-/*
- * Flush all sends that might be in the ready to send state, as well as any
- * that are in the process of being sent.   Used whenever we need to be
- * sure the send side is idle.  Cleans up all buffer state by canceling
- * all pio buffers, and issuing an abort, which cleans up anything in the
- * launch fifo.  The cancel is superfluous on some chip versions, but
- * it's safer to always do it.
- * PIOAvail bits are updated by the chip as if normal send had happened.
- */
-void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
-{
-       unsigned long flags;
-
-       if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
-               ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
-               goto bail;
-       }
-       /*
-        * If we have SDMA, and it's not disabled, we have to kick off the
-        * abort state machine, provided we aren't already aborting.
-        * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
-        * we skip the rest of this routine. It is already "in progress"
-        */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
-               int skip_cancel;
-               unsigned long *statp = &dd->ipath_sdma_status;
-
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               skip_cancel =
-                       test_and_set_bit(IPATH_SDMA_ABORTING, statp)
-                       && !test_bit(IPATH_SDMA_DISABLED, statp);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (skip_cancel)
-                       goto bail;
-       }
-
-       ipath_dbg("Cancelling all in-progress send buffers\n");
-
-       /* skip armlaunch errs for a while */
-       dd->ipath_lastcancel = jiffies + HZ / 2;
-
-       /*
-        * The abort bit is auto-clearing.  We also don't want pioavail
-        * update happening during this, and we don't want any other
-        * sends going out, so turn those off for the duration.  We read
-        * the scratch register to be sure that cancels and the abort
-        * have taken effect in the chip.  Otherwise two parts are same
-        * as ipath_force_pio_avail_update()
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
-               | INFINIPATH_S_PIOENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl | INFINIPATH_S_ABORT);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* disarm all send buffers */
-       ipath_disarm_piobufs(dd, 0,
-               dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-
-       if (restore_sendctrl) {
-               /* else done by caller later if needed */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
-                       INFINIPATH_S_PIOENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               /* and again, be sure all have hit the chip */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-
-       if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
-           !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
-           test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               /* only wait so long for intr */
-               dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
-               dd->ipath_sdma_reset_wait = 200;
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       }
-bail:;
-}
-
-/*
- * Force an update of in-memory copy of the pioavail registers, when
- * needed for any of a variety of reasons.  We read the scratch register
- * to make it highly likely that the update will have happened by the
- * time we return.  If already off (as in cancel_sends above), this
- * routine is a nop, on the assumption that the caller will "do the
- * right thing".
- */
-void ipath_force_pio_avail_update(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
-                               int linitcmd)
-{
-       u64 mod_wd;
-       static const char *what[4] = {
-               [0] = "NOP",
-               [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
-               [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
-               [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
-       };
-
-       if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
-               /*
-                * If we are told to disable, note that so link-recovery
-                * code does not attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       } else if (linitcmd) {
-               /*
-                * Any other linkinitcmd will lead to LINKDOWN and then
-                * to INIT (if all is well), so clear flag to let
-                * link-recovery code attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       }
-
-       mod_wd = (linkcmd << dd->ibcc_lc_shift) |
-               (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-       ipath_cdbg(VERBOSE,
-               "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
-               dd->ipath_unit, what[linkcmd], linitcmd,
-               ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
-                       ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl | mod_wd);
-       /* read from chip so write is flushed */
-       (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-}
-
-int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-       u32 lstate;
-       int ret;
-
-       switch (newstate) {
-       case IPATH_IB_LINKDOWN_ONLY:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_POLL);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_SLEEP:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_SLEEP);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_DISABLE:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKARM:
-               if (dd->ipath_flags & IPATH_LINKARMED) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags &
-                     (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
-
-               /*
-                * Since the port can transition to ACTIVE by receiving
-                * a non VL 15 packet, wait for either state.
-                */
-               lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINKACTIVE:
-               if (dd->ipath_flags & IPATH_LINKACTIVE) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
-               lstate = IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINK_LOOPBACK:
-               dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-
-               /* turn heartbeat off, as it causes loopback to fail */
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_OFF);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINK_EXTERNAL:
-               dev_info(&dd->pcidev->dev,
-                       "Disabling IB local loopback (normal)\n");
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_ON);
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       /*
-        * Heartbeat can be explicitly enabled by the user via
-        * "hrtbt_enable" "file", and if disabled, trying to enable here
-        * will have no effect.  Implicit changes (heartbeat off when
-        * loopback on, and vice versa) are included to ease testing.
-        */
-       case IPATH_IB_LINK_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_ON);
-               goto bail;
-
-       case IPATH_IB_LINK_NO_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_OFF);
-               goto bail;
-
-       default:
-               ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link INIT state...
- */
-int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-       u32 piosize;
-       int changed = 0;
-       int ret;
-
-       /*
-        * mtu is IB data payload max.  It's the largest power of 2 less
-        * than piosize (or even larger, since it only really controls the
-        * largest we can receive; we can send the max of the mtu and
-        * piosize).  We check that it's one of the valid IB sizes.
-        */
-       if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-           (arg != 4096 || !ipath_mtu4096)) {
-               ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (dd->ipath_ibmtu == arg) {
-               ret = 0;        /* same as current */
-               goto bail;
-       }
-
-       piosize = dd->ipath_ibmaxlen;
-       dd->ipath_ibmtu = arg;
-
-       if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-               /* Only if it's not the initial value (or reset to it) */
-               if (piosize != dd->ipath_init_ibmaxlen) {
-                       if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
-                               piosize = dd->ipath_init_ibmaxlen;
-                       dd->ipath_ibmaxlen = piosize;
-                       changed = 1;
-               }
-       } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-               piosize = arg + IPATH_PIO_MAXIBHDR;
-               ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-                          "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-                          arg);
-               dd->ipath_ibmaxlen = piosize;
-               changed = 1;
-       }
-
-       if (changed) {
-               u64 ibc = dd->ipath_ibcctrl, ibdw;
-               /*
-                * update our housekeeping variables, and set IBC max
-                * size, same as init code; max IBC is max we allow in
-                * buffer, less the qword pbc, plus 1 for ICRC, in dwords
-                */
-               dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
-               ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
-               ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-                        dd->ibcc_mpl_shift);
-               ibc |= ibdw << dd->ibcc_mpl_shift;
-               dd->ipath_ibcctrl = ibc;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               dd->ipath_f_tidtemplate(dd);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
-{
-       dd->ipath_lid = lid;
-       dd->ipath_lmc = lmc;
-
-       dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
-               (~((1U << lmc) - 1)) << 16);
-
-       dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
-
-       return 0;
-}
-
-
-/**
- * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to write
- * @port: the port containing the register
- * @value: the value to write
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
-                         unsigned port, u64 value)
-{
-       u16 where;
-
-       if (port < dd->ipath_portcnt &&
-           (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-            regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-               where = regno + port;
-       else
-               where = -1;
-
-       ipath_write_kreg(dd, where, value);
-}
-
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDS, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
-
-static void ipath_run_led_override(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-       int timeoff;
-       int pidx;
-       u64 lstate, ltstate, val;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       pidx = dd->ipath_led_override_phase++ & 1;
-       dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
-       timeoff = dd->ipath_led_override_timeoff;
-
-       /*
-        * below potentially restores the LED values per current status,
-        * should also possibly setup the traffic-blink register,
-        * but leave that to per-chip functions.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-       ltstate = ipath_ib_linktrstate(dd, val);
-       lstate = ipath_ib_linkstate(dd, val);
-
-       dd->ipath_f_setextled(dd, lstate, ltstate);
-       mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
-}
-
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
-{
-       int timeoff, freq;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       /* First check if we are blinking. If not, use 1HZ polling */
-       timeoff = HZ;
-       freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
-
-       if (freq) {
-               /* For blink, set each phase from one nybble of val */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
-               timeoff = (HZ << 4)/freq;
-       } else {
-               /* Non-blink set both phases the same. */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = val & 0xF;
-       }
-       dd->ipath_led_override_timeoff = timeoff;
-
-       /*
-        * If the timer has not already been started, do so. Use a "quick"
-        * timeout so the function will be called soon, to look at our request.
-        */
-       if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
-               /* Need to start timer */
-               setup_timer(&dd->ipath_led_override_timer,
-                               ipath_run_led_override, (unsigned long)dd);
-
-               dd->ipath_led_override_timer.expires = jiffies + 1;
-               add_timer(&dd->ipath_led_override_timer);
-       } else
-               atomic_dec(&dd->ipath_led_override_timer_active);
-}
-
-/**
- * ipath_shutdown_device - shut down a device
- * @dd: the infinipath device
- *
- * This is called to make the device quiet when we are about to
- * unload the driver, and also when the device is administratively
- * disabled.   It does not free any data structures.
- * Everything it does has to be setup again by ipath_init_chip(dd,1)
- */
-void ipath_shutdown_device(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       ipath_dbg("Shutting down the device\n");
-
-       ipath_hol_up(dd); /* make sure user processes aren't suspended */
-
-       dd->ipath_flags |= IPATH_LINKUNK;
-       dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
-                            IPATH_LINKINIT | IPATH_LINKARMED |
-                            IPATH_LINKACTIVE);
-       *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
-                               IPATH_STATUS_IB_READY);
-
-       /* mask interrupts, but not errors */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       dd->ipath_rcvctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       /*
-        * gracefully stop all sends allowing any in progress to trickle out
-        * first.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       /* flush it */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * enough for anything that's going to trickle out to have actually
-        * done so.
-        */
-       udelay(5);
-
-       dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
-
-       ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-       ipath_cancel_sends(dd, 0);
-
-       /*
-        * we are shutting down, so tell components that care.  We don't do
-        * this on just a link state change, much like ethernet, a cable
-        * unplug, etc. doesn't change driver state
-        */
-       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-
-       /* disable IBC */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control | INFINIPATH_C_FREEZEMODE);
-
-       /*
-        * clear SerdesEnable and turn the leds off; do this here because
-        * we are unloading, so don't count on interrupts to move along
-        * Turn the LEDs off explicitly for the same reason.
-        */
-       dd->ipath_f_quiet_serdes(dd);
-
-       /* stop all the timers that might still be running */
-       del_timer_sync(&dd->ipath_hol_timer);
-       if (dd->ipath_stats_timer_active) {
-               del_timer_sync(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 0;
-       }
-       if (dd->ipath_intrchk_timer.data) {
-               del_timer_sync(&dd->ipath_intrchk_timer);
-               dd->ipath_intrchk_timer.data = 0;
-       }
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /*
-        * clear all interrupts and errors, so that the next time the driver
-        * is loaded or device is enabled, we know that whatever is set
-        * happened while we were unloaded
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
-       ipath_update_eeprom_log(dd);
-}
-
-/**
- * ipath_free_pddata - free a port's allocated data
- * @dd: the infinipath device
- * @pd: the portdata structure
- *
- * free up any allocated data for a port
- * This should not touch anything that would affect a simultaneous
- * re-allocation of port data, because it is called after ipath_mutex
- * is released (and can be called from reinit as well).
- * It should never change any chip state, or global driver state.
- * (The only exception to global state is freeing the port0 port0_skbs.)
- */
-void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
-{
-       if (!pd)
-               return;
-
-       if (pd->port_rcvhdrq) {
-               ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
-                          "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_size);
-               dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
-                                 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-               pd->port_rcvhdrq = NULL;
-               if (pd->port_rcvhdrtail_kvaddr) {
-                       dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                        pd->port_rcvhdrtail_kvaddr,
-                                        pd->port_rcvhdrqtailaddr_phys);
-                       pd->port_rcvhdrtail_kvaddr = NULL;
-               }
-       }
-       if (pd->port_port && pd->port_rcvegrbuf) {
-               unsigned e;
-
-               for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-                       void *base = pd->port_rcvegrbuf[e];
-                       size_t size = pd->port_rcvegrbuf_size;
-
-                       ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
-                                  "chunk %u/%u\n", base,
-                                  (unsigned long) size,
-                                  e, pd->port_rcvegrbuf_chunks);
-                       dma_free_coherent(&dd->pcidev->dev, size,
-                               base, pd->port_rcvegrbuf_phys[e]);
-               }
-               kfree(pd->port_rcvegrbuf);
-               pd->port_rcvegrbuf = NULL;
-               kfree(pd->port_rcvegrbuf_phys);
-               pd->port_rcvegrbuf_phys = NULL;
-               pd->port_rcvegrbuf_chunks = 0;
-       } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
-               unsigned e;
-               struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
-
-               dd->ipath_port0_skbinfo = NULL;
-               ipath_cdbg(VERBOSE, "free closed port %d "
-                          "ipath_port0_skbinfo @ %p\n", pd->port_port,
-                          skbinfo);
-               for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
-                       if (skbinfo[e].skb) {
-                               pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-                                                dd->ipath_ibmaxlen,
-                                                PCI_DMA_FROMDEVICE);
-                               dev_kfree_skb(skbinfo[e].skb);
-                       }
-               vfree(skbinfo);
-       }
-       kfree(pd->port_tid_pg_list);
-       vfree(pd->subport_uregbase);
-       vfree(pd->subport_rcvegrbuf);
-       vfree(pd->subport_rcvhdr_base);
-       kfree(pd);
-}
-
-static int __init infinipath_init(void)
-{
-       int ret;
-
-       if (ipath_debug & __IPATH_DBG)
-               printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
-
-       /*
-        * These must be called before the driver is registered with
-        * the PCI subsystem.
-        */
-       idr_init(&unit_table);
-
-       ret = pci_register_driver(&ipath_driver);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Unable to register driver: error %d\n", -ret);
-               goto bail_unit;
-       }
-
-       ret = ipath_init_ipathfs();
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
-                      "ipathfs: error %d\n", -ret);
-               goto bail_pci;
-       }
-
-       goto bail;
-
-bail_pci:
-       pci_unregister_driver(&ipath_driver);
-
-bail_unit:
-       idr_destroy(&unit_table);
-
-bail:
-       return ret;
-}
-
-static void __exit infinipath_cleanup(void)
-{
-       ipath_exit_ipathfs();
-
-       ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
-       pci_unregister_driver(&ipath_driver);
-
-       idr_destroy(&unit_table);
-}
-
-/**
- * ipath_reset_device - reset the chip if possible
- * @unit: the device to reset
- *
- * Whether or not reset is successful, we attempt to re-initialize the chip
- * (that is, much like a driver unload/reload).  We clear the INITTED flag
- * so that the various entry points will fail until we reinitialize.  For
- * now, we only allow this if no user ports are open that use chip resources
- */
-int ipath_reset_device(int unit)
-{
-       int ret, i;
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       unsigned long flags;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               /* Need to stop LED timer, _then_ shut off LEDs */
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /* Shut off LEDs after we are sure timer is not running */
-       dd->ipath_led_override = LED_OVER_BOTH_OFF;
-       dd->ipath_f_setextled(dd, 0, 0);
-
-       dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
-
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
-               dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
-                        "not initialized or not present\n", unit);
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       if (dd->ipath_pd)
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                               continue;
-                       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-                       ipath_dbg("unit %u port %d is in use "
-                                 "(PID %u cmd %s), can't reset\n",
-                                 unit, i,
-                                 pid_nr(dd->ipath_pd[i]->port_pid),
-                                 dd->ipath_pd[i]->port_comm);
-                       ret = -EBUSY;
-                       goto bail;
-               }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       dd->ipath_flags &= ~IPATH_INITTED;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-       ret = dd->ipath_f_reset(dd);
-       if (ret == 1) {
-               ipath_dbg("Reinitializing unit %u after reset attempt\n",
-                         unit);
-               ret = ipath_init_chip(dd, 1);
-       } else
-               ret = -EAGAIN;
-       if (ret)
-               ipath_dev_err(dd, "Reinitialize unit %u after "
-                             "reset failed with %d\n", unit, ret);
-       else
-               dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
-                        "resetting\n", unit);
-
-bail:
-       return ret;
-}
-
-/*
- * send a signal to all the processes that have the driver open
- * through the normal interfaces (i.e., everything other than diags
- * interface).  Returns number of signalled processes.
- */
-static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
-{
-       int i, sub, any = 0;
-       struct pid *pid;
-       unsigned long flags;
-
-       if (!dd->ipath_pd)
-               return 0;
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                       continue;
-               pid = dd->ipath_pd[i]->port_pid;
-               if (!pid)
-                       continue;
-
-               dev_info(&dd->pcidev->dev, "context %d in use "
-                         "(PID %u), sending signal %d\n",
-                         i, pid_nr(pid), sig);
-               kill_pid(pid, sig, 1);
-               any++;
-               for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
-                       pid = dd->ipath_pd[i]->port_subpid[sub];
-                       if (!pid)
-                               continue;
-                       dev_info(&dd->pcidev->dev, "sub-context "
-                               "%d:%d in use (PID %u), sending "
-                               "signal %d\n", i, sub, pid_nr(pid), sig);
-                       kill_pid(pid, sig, 1);
-                       any++;
-               }
-       }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       return any;
-}
-
-static void ipath_hol_signal_down(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGSTOP))
-               ipath_dbg("Stopped some processes\n");
-       ipath_cancel_sends(dd, 1);
-}
-
-
-static void ipath_hol_signal_up(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGCONT))
-               ipath_dbg("Continued some processes\n");
-}
-
-/*
- * link is down, stop any users processes, and flush pending sends
- * to prevent HoL blocking, then start the HoL timer that
- * periodically continues, then stop procs, so they can detect
- * link down if they want, and do something about it.
- * Timer may already be running, so use mod_timer, not add_timer.
- */
-void ipath_hol_down(struct ipath_devdata *dd)
-{
-       dd->ipath_hol_state = IPATH_HOL_DOWN;
-       ipath_hol_signal_down(dd);
-       dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-       dd->ipath_hol_timer.expires = jiffies +
-               msecs_to_jiffies(ipath_hol_timeout_ms);
-       mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
-}
-
-/*
- * link is up, continue any user processes, and ensure timer
- * is a nop, if running.  Let timer keep running, if set; it
- * will nop when it sees the link is up
- */
-void ipath_hol_up(struct ipath_devdata *dd)
-{
-       ipath_hol_signal_up(dd);
-       dd->ipath_hol_state = IPATH_HOL_UP;
-}
-
-/*
- * toggle the running/not running state of user proceses
- * to prevent HoL blocking on chip resources, but still allow
- * user processes to do link down special case handling.
- * Should only be called via the timer
- */
-void ipath_hol_event(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
-               && dd->ipath_hol_state != IPATH_HOL_UP) {
-               dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-               ipath_dbg("Stopping processes\n");
-               ipath_hol_signal_down(dd);
-       } else { /* may do "extra" if also in ipath_hol_up() */
-               dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
-               ipath_dbg("Continuing processes\n");
-               ipath_hol_signal_up(dd);
-       }
-       if (dd->ipath_hol_state == IPATH_HOL_UP)
-               ipath_dbg("link's up, don't resched timer\n");
-       else {
-               dd->ipath_hol_timer.expires = jiffies +
-                       msecs_to_jiffies(ipath_hol_timeout_ms);
-               mod_timer(&dd->ipath_hol_timer,
-                       dd->ipath_hol_timer.expires);
-       }
-}
-
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
-{
-       u64 val;
-
-       if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
-               return -1;
-       if (dd->ipath_rx_pol_inv != new_pol_inv) {
-               dd->ipath_rx_pol_inv = new_pol_inv;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= ((u64)dd->ipath_rx_pol_inv) <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       }
-       return 0;
-}
-
-/*
- * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
- * the 7220, which is count-based, rather than trigger-based.  Safe for the
- * driver check, since it's at init.   Not completely safe when used for
- * user-mode checking, since some error checking can be lost, but not
- * particularly risky, and only has problematic side-effects in the face of
- * very buggy user code.  There is no reference counting, but that's also
- * fine, given the intended use.
- */
-void ipath_enable_armlaunch(struct ipath_devdata *dd)
-{
-       dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               INFINIPATH_E_SPIOARMLAUNCH);
-       dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-void ipath_disable_armlaunch(struct ipath_devdata *dd)
-{
-       /* so don't re-enable if already set */
-       dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-module_init(infinipath_init);
-module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644 (file)
index ef84107..0000000
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-
-/*
- * InfiniPath I2C driver for a serial eeprom.  This is not a generic
- * I2C interface.  For a start, the device we're using (Atmel AT24C11)
- * doesn't work like a regular I2C device.  It looks like one
- * electrically, but not logically.  Normal I2C devices have a single
- * 7-bit or 10-bit I2C address that they respond to.  Valid 7-bit
- * addresses range from 0x03 to 0x77.  Addresses 0x00 to 0x02 and 0x78
- * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
- * call" address.)  The Atmel device, on the other hand, responds to ALL
- * 7-bit addresses.  It's designed to be the only device on a given I2C
- * bus.  A 7-bit address corresponds to the memory address within the
- * Atmel device itself.
- *
- * Also, the timing requirements mean more than simple software
- * bitbanging, with readbacks from chip to ensure timing (simple udelay
- * is not enough).
- *
- * This all means that accessing the device is specialized enough
- * that using the standard kernel I2C bitbanging interface would be
- * impossible.  For example, the core I2C eeprom driver expects to find
- * a device at one or more of a limited set of addresses only.  It doesn't
- * allow writing to an eeprom.  It also doesn't provide any means of
- * accessing eeprom contents from within the kernel, only via sysfs.
- */
-
-/* Added functionality for IBA7220-based cards */
-#define IPATH_EEPROM_DEV_V1 0xA0
-#define IPATH_EEPROM_DEV_V2 0xA2
-#define IPATH_TEMP_DEV 0x98
-#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
-#define IPATH_NO_DEV (0xFF)
-
-/*
- * The number of I2C chains is proliferating. Table below brings
- * some order to the madness. The basic principle is that the
- * table is scanned from the top, and a "probe" is made to the
- * device probe_dev. If that succeeds, the chain is considered
- * to be of that type, and dd->i2c_chain_type is set to the index+1
- * of the entry.
- * The +1 is so static initialization can mean "unknown, do probe."
- */
-static struct i2c_chain_desc {
-       u8 probe_dev;   /* If seen at probe, chain is this type */
-       u8 eeprom_dev;  /* Dev addr (if any) for EEPROM */
-       u8 temp_dev;    /* Dev Addr (if any) for Temp-sense */
-} i2c_chains[] = {
-       { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
-       { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
-       { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
-       { IPATH_NO_DEV }
-};
-
-enum i2c_type {
-       i2c_line_scl = 0,
-       i2c_line_sda
-};
-
-enum i2c_state {
-       i2c_line_low = 0,
-       i2c_line_high
-};
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_gpio_set - set a GPIO line
- * @dd: the infinipath device
- * @line: the line to set
- * @new_line_state: the state to set
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.
- */
-static int i2c_gpio_set(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state new_line_state)
-{
-       u64 out_mask, dir_mask, *gpioval;
-       unsigned long flags = 0;
-
-       gpioval = &dd->ipath_gpio_out;
-
-       if (line == i2c_line_scl) {
-               dir_mask = dd->ipath_gpio_scl;
-               out_mask = (1UL << dd->ipath_gpio_scl_num);
-       } else {
-               dir_mask = dd->ipath_gpio_sda;
-               out_mask = (1UL << dd->ipath_gpio_sda_num);
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       if (new_line_state == i2c_line_high) {
-               /* tri-state the output rather than force high */
-               dd->ipath_extctrl &= ~dir_mask;
-       } else {
-               /* config line to be an output */
-               dd->ipath_extctrl |= dir_mask;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-
-       /* set output as well (no real verify) */
-       if (new_line_state == i2c_line_high)
-               *gpioval |= out_mask;
-       else
-               *gpioval &= ~out_mask;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       return 0;
-}
-
-/**
- * i2c_gpio_get - get a GPIO line state
- * @dd: the infinipath device
- * @line: the line to get
- * @curr_statep: where to put the line state
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.  curr_state is not set on error.
- */
-static int i2c_gpio_get(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state *curr_statep)
-{
-       u64 read_val, mask;
-       int ret;
-       unsigned long flags = 0;
-
-       /* check args */
-       if (curr_statep == NULL) {
-               ret = 1;
-               goto bail;
-       }
-
-       /* config line to be an input */
-       if (line == i2c_line_scl)
-               mask = dd->ipath_gpio_scl;
-       else
-               mask = dd->ipath_gpio_sda;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       dd->ipath_extctrl &= ~mask;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-       /*
-        * Below is very unlikely to reflect true input state if Output
-        * Enable actually changed.
-        */
-       read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       if (read_val & mask)
-               *curr_statep = i2c_line_high;
-       else
-               *curr_statep = i2c_line_low;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the infinipath device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip.  Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct ipath_devdata *dd)
-{
-       (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       rmb();
-}
-
-static void scl_out(struct ipath_devdata *dd, u8 bit)
-{
-       udelay(1);
-       i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static void sda_out(struct ipath_devdata *dd, u8 bit)
-{
-       i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static u8 sda_in(struct ipath_devdata *dd, int wait)
-{
-       enum i2c_state bit;
-
-       if (i2c_gpio_get(dd, i2c_line_sda, &bit))
-               ipath_dbg("get bit failed!\n");
-
-       if (wait)
-               i2c_wait_for_writes(dd);
-
-       return bit == i2c_line_high ? 1U : 0;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the infinipath device
- */
-static int i2c_ackrcv(struct ipath_devdata *dd)
-{
-       u8 ack_received;
-
-       /* AT ENTRY SCL = LOW */
-       /* change direction, ignore data */
-       ack_received = sda_in(dd, 1);
-       scl_out(dd, i2c_line_high);
-       ack_received = sda_in(dd, 1) == 0;
-       scl_out(dd, i2c_line_low);
-       return ack_received;
-}
-
-/**
- * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
- * @dd: the infinipath device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct ipath_devdata *dd)
-{
-       int bit_cntr, data;
-
-       data = 0;
-
-       for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
-               data <<= 1;
-               scl_out(dd, i2c_line_high);
-               data |= sda_in(dd, 0);
-               scl_out(dd, i2c_line_low);
-       }
-       return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the infinipath device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct ipath_devdata *dd, u8 data)
-{
-       int bit_cntr;
-       u8 bit;
-
-       for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
-               bit = (data >> bit_cntr) & 1;
-               sda_out(dd, bit);
-               scl_out(dd, i2c_line_high);
-               scl_out(dd, i2c_line_low);
-       }
-       return (!i2c_ackrcv(dd)) ? 1 : 0;
-}
-
-static void send_ack(struct ipath_devdata *dd)
-{
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-}
-
-/**
- * i2c_startcmd - transmit the start condition, followed by address/cmd
- * @dd: the infinipath device
- * @offset_dir: direction byte
- *
- *      (both clock/data high, clock high, data low while clock is high)
- */
-static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
-{
-       int res;
-
-       /* issue start sequence */
-       sda_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_low);
-
-       /* issue length and direction byte */
-       res = wr_byte(dd, offset_dir);
-
-       if (res)
-               ipath_cdbg(VERBOSE, "No ack to complete start\n");
-
-       return res;
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the infinipath device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct ipath_devdata *dd)
-{
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_high);
-       udelay(2);
-}
-
-/**
- * eeprom_reset - reset I2C communication
- * @dd: the infinipath device
- */
-
-static int eeprom_reset(struct ipath_devdata *dd)
-{
-       int clock_cycles_left = 9;
-       u64 *gpioval = &dd->ipath_gpio_out;
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /* Make sure shadows are consistent */
-       dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-       *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
-                  "is %llx\n", (unsigned long long) *gpioval);
-
-       /*
-        * This is to get the i2c into a known state, by first going low,
-        * then tristate sda (and then tristate scl as first thing
-        * in loop)
-        */
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-
-       /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
-       while (clock_cycles_left--) {
-               scl_out(dd, i2c_line_high);
-
-               /* SDA seen high, issue START by dropping it while SCL high */
-               if (sda_in(dd, 0)) {
-                       sda_out(dd, i2c_line_low);
-                       scl_out(dd, i2c_line_low);
-                       /* ATMEL spec says must be followed by STOP. */
-                       scl_out(dd, i2c_line_high);
-                       sda_out(dd, i2c_line_high);
-                       ret = 0;
-                       goto bail;
-               }
-
-               scl_out(dd, i2c_line_low);
-       }
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/*
- * Probe for I2C device at specified address. Returns 0 for "success"
- * to match rest of this file.
- * Leave bus in "reasonable" state for further commands.
- */
-static int i2c_probe(struct ipath_devdata *dd, int devaddr)
-{
-       int ret;
-
-       ret = eeprom_reset(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
-                             devaddr);
-               return ret;
-       }
-       /*
-        * Reset no longer leaves bus in start condition, so normal
-        * i2c_startcmd() will do.
-        */
-       ret = i2c_startcmd(dd, devaddr | READ_CMD);
-       if (ret)
-               ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
-                          devaddr);
-       else {
-               /*
-                * Device did respond. Complete a single-byte read, because some
-                * devices apparently cannot handle STOP immediately after they
-                * ACK the start-cmd.
-                */
-               int data;
-               data = rd_byte(dd);
-               stop_cmd(dd);
-               ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
-       }
-       return ret;
-}
-
-/*
- * Returns the "i2c type". This is a pointer to a struct that describes
- * the I2C chain on this board. To minimize impact on struct ipath_devdata,
- * the (small integer) index into the table is actually memoized, rather
- * then the pointer.
- * Memoization is because the type is determined on the first call per chip.
- * An alternative would be to move type determination to early
- * init code.
- */
-static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
-{
-       int idx;
-
-       /* Get memoized index, from previous successful probes */
-       idx = dd->ipath_i2c_chain_type - 1;
-       if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
-               goto done;
-
-       idx = 0;
-       while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
-               /* if probe succeeds, this is type */
-               if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
-                       break;
-               ++idx;
-       }
-
-       /*
-        * Old EEPROM (first entry) may require a reset after probe,
-        * rather than being able to "start" after "stop"
-        */
-       if (idx == 0)
-               eeprom_reset(dd);
-
-       if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
-               idx = -1;
-       else
-               dd->ipath_i2c_chain_type = idx + 1;
-done:
-       return (idx >= 0) ? i2c_chains + idx : NULL;
-}
-
-static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
-                                       u8 eeprom_offset, void *buffer, int len)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-       u8 *bp = buffer;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->eeprom_dev == IPATH_NO_DEV) {
-               /* legacy not-really-I2C */
-               ipath_cdbg(VERBOSE, "Start command only address\n");
-               eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-               ret = i2c_startcmd(dd, eeprom_offset);
-       } else {
-               /* Actual I2C */
-               ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
-               if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                       ipath_dbg("Failed EEPROM startcmd\n");
-                       stop_cmd(dd);
-                       ret = 1;
-                       goto bail;
-               }
-               ret = wr_byte(dd, eeprom_offset);
-               stop_cmd(dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Failed to write EEPROM address\n");
-                       ret = 1;
-                       goto bail;
-               }
-               ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
-       }
-       if (ret) {
-               ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
-               stop_cmd(dd);
-               ret = 1;
-               goto bail;
-       }
-
-       /*
-        * eeprom keeps clocking data out as long as we ack, automatically
-        * incrementing the address.
-        */
-       while (len-- > 0) {
-               /* get and store data */
-               *bp++ = rd_byte(dd);
-               /* send ack if not the last byte */
-               if (len)
-                       send_ack(dd);
-       }
-
-       stop_cmd(dd);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                                      const void *buffer, int len)
-{
-       int sub_len;
-       const u8 *bp = buffer;
-       int max_wait_time, i;
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       while (len > 0) {
-               if (icd->eeprom_dev == IPATH_NO_DEV) {
-                       if (i2c_startcmd(dd,
-                                        (eeprom_offset << 1) | WRITE_CMD)) {
-                               ipath_dbg("Failed to start cmd offset %u\n",
-                                       eeprom_offset);
-                               goto failed_write;
-                       }
-               } else {
-                       /* Real I2C */
-                       if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                               ipath_dbg("Failed EEPROM startcmd\n");
-                               goto failed_write;
-                       }
-                       ret = wr_byte(dd, eeprom_offset);
-                       if (ret) {
-                               ipath_dev_err(dd, "Failed to write EEPROM "
-                                             "address\n");
-                               goto failed_write;
-                       }
-               }
-
-               sub_len = min(len, 4);
-               eeprom_offset += sub_len;
-               len -= sub_len;
-
-               for (i = 0; i < sub_len; i++) {
-                       if (wr_byte(dd, *bp++)) {
-                               ipath_dbg("no ack after byte %u/%u (%u "
-                                         "total remain)\n", i, sub_len,
-                                         len + sub_len - i);
-                               goto failed_write;
-                       }
-               }
-
-               stop_cmd(dd);
-
-               /*
-                * wait for write complete by waiting for a successful
-                * read (the chip replies with a zero after the write
-                * cmd completes, and before it writes to the eeprom.
-                * The startcmd for the read will fail the ack until
-                * the writes have completed.   We do this inline to avoid
-                * the debug prints that are in the real read routine
-                * if the startcmd fails.
-                * We also use the proper device address, so it doesn't matter
-                * whether we have real eeprom_dev. legacy likes any address.
-                */
-               max_wait_time = 100;
-               while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
-                       stop_cmd(dd);
-                       if (!--max_wait_time) {
-                               ipath_dbg("Did not get successful read to "
-                                         "complete write\n");
-                               goto failed_write;
-                       }
-               }
-               /* now read (and ignore) the resulting byte */
-               rd_byte(dd);
-               stop_cmd(dd);
-       }
-
-       ret = 0;
-       goto bail;
-
-failed_write:
-       stop_cmd(dd);
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
- */
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-                       void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                       const void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-static u8 flash_csum(struct ipath_flash *ifp, int adjust)
-{
-       u8 *ip = (u8 *) ifp;
-       u8 csum = 0, len;
-
-       /*
-        * Limit length checksummed to max length of actual data.
-        * Checksum of erased eeprom will still be bad, but we avoid
-        * reading past the end of the buffer we were passed.
-        */
-       len = ifp->if_length;
-       if (len > sizeof(struct ipath_flash))
-               len = sizeof(struct ipath_flash);
-       while (len--)
-               csum += *ip++;
-       csum -= ifp->if_csum;
-       csum = ~csum;
-       if (adjust)
-               ifp->if_csum = csum;
-
-       return csum;
-}
-
-/**
- * ipath_get_guid - get the GUID from the i2c device
- * @dd: the infinipath device
- *
- * We have the capability to use the ipath_nguid field, and get
- * the guid from the first chip's flash, to use for all of them.
- */
-void ipath_get_eeprom_info(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       __be64 guid;
-       int len, eep_stat;
-       u8 csum, *bguid;
-       int t = dd->ipath_unit;
-       struct ipath_devdata *dd0 = ipath_lookup(0);
-
-       if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
-               u8 oguid;
-               dd->ipath_guid = dd0->ipath_guid;
-               bguid = (u8 *) & dd->ipath_guid;
-
-               oguid = bguid[7];
-               bguid[7] += t;
-               if (oguid > bguid[7]) {
-                       if (bguid[6] == 0xff) {
-                               if (bguid[5] == 0xff) {
-                                       ipath_dev_err(
-                                               dd,
-                                               "Can't set %s GUID from "
-                                               "base, wraps to OUI!\n",
-                                               ipath_get_unit_name(t));
-                                       dd->ipath_guid = 0;
-                                       goto bail;
-                               }
-                               bguid[5]++;
-                       }
-                       bguid[6]++;
-               }
-               dd->ipath_nguid = 1;
-
-               ipath_dbg("nguid %u, so adding %u to device 0 guid, "
-                         "for %llx\n",
-                         dd0->ipath_nguid, t,
-                         (unsigned long long) be64_to_cpu(dd->ipath_guid));
-               goto bail;
-       }
-
-       /*
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        * */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                             "bytes from eeprom for GUID\n", len);
-               goto bail;
-       }
-
-       mutex_lock(&dd->ipath_eep_lock);
-       eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
-       mutex_unlock(&dd->ipath_eep_lock);
-
-       if (eep_stat) {
-               ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
-               goto done;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
-                        "0x%x, not 0x%x\n", csum, ifp->if_csum);
-               goto done;
-       }
-       if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
-           *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
-               ipath_dev_err(dd, "Invalid GUID %llx from flash; "
-                             "ignoring\n",
-                             *(unsigned long long *) ifp->if_guid);
-               /* don't allow GUID if all 0 or all 1's */
-               goto done;
-       }
-
-       /* complain, but allow it */
-       if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
-               dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
-                        "default, probably not correct!\n",
-                        *(unsigned long long *) ifp->if_guid);
-
-       bguid = ifp->if_guid;
-       if (!bguid[0] && !bguid[1] && !bguid[2]) {
-               /* original incorrect GUID format in flash; fix in
-                * core copy, by shifting up 2 octets; don't need to
-                * change top octet, since both it and shifted are
-                * 0.. */
-               bguid[1] = bguid[3];
-               bguid[2] = bguid[4];
-               bguid[3] = bguid[4] = 0;
-               guid = *(__be64 *) ifp->if_guid;
-               ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
-                          "shifting 2 octets\n");
-       } else
-               guid = *(__be64 *) ifp->if_guid;
-       dd->ipath_guid = guid;
-       dd->ipath_nguid = ifp->if_numguid;
-       /*
-        * Things are slightly complicated by the desire to transparently
-        * support both the Pathscale 10-digit serial number and the QLogic
-        * 13-character version.
-        */
-       if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
-               && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
-               /* This board has a Serial-prefix, which is stored
-                * elsewhere for backward-compatibility.
-                */
-               char *snp = dd->ipath_serial;
-               memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
-               snp[sizeof ifp->if_sprefix] = '\0';
-               len = strlen(snp);
-               snp += len;
-               len = (sizeof dd->ipath_serial) - len;
-               if (len > sizeof ifp->if_serial) {
-                       len = sizeof ifp->if_serial;
-               }
-               memcpy(snp, ifp->if_serial, len);
-       } else
-               memcpy(dd->ipath_serial, ifp->if_serial,
-                      sizeof ifp->if_serial);
-       if (!strstr(ifp->if_comment, "Tested successfully"))
-               ipath_dev_err(dd, "Board SN %s did not pass functional "
-                       "test: %s\n", dd->ipath_serial,
-                       ifp->if_comment);
-
-       ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
-                  (unsigned long long) be64_to_cpu(dd->ipath_guid));
-
-       memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
-       /*
-        * Power-on (actually "active") hours are kept as little-endian value
-        * in EEPROM, but as seconds in a (possibly as small as 24-bit)
-        * atomic_t while running.
-        */
-       atomic_set(&dd->ipath_active_time, 0);
-       dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
-
-done:
-       vfree(buf);
-
-bail:;
-}
-
-/**
- * ipath_update_eeprom_log - copy active-time and error counters to eeprom
- * @dd: the infinipath device
- *
- * Although the time is kept as seconds in the ipath_devdata struct, it is
- * rounded to hours for re-write, as we have only 16 bits in EEPROM.
- * First-cut code reads whole (expected) struct ipath_flash, modifies,
- * re-writes. Future direction: read/write only what we need, assuming
- * that the EEPROM had to have been "good enough" for driver init, and
- * if not, we aren't making it worse.
- *
- */
-
-int ipath_update_eeprom_log(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       int len, hi_water;
-       uint32_t new_time, new_hrs;
-       u8 csum;
-       int ret, idx;
-       unsigned long flags;
-
-       /* first, check if we actually need to do anything. */
-       ret = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               if (dd->ipath_eep_st_new_errs[idx]) {
-                       ret = 1;
-                       break;
-               }
-       }
-       new_time = atomic_read(&dd->ipath_active_time);
-
-       if (ret == 0 && new_time < 3600)
-               return 0;
-
-       /*
-        * The quick-check above determined that there is something worthy
-        * of logging, so get current contents and do a more detailed idea.
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       ret = 1;
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                               "bytes from eeprom for logging\n", len);
-               goto bail;
-       }
-
-       /* Grab semaphore and read current EEPROM. If we get an
-        * error, let go, but if not, keep it until we finish write.
-        */
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (ret) {
-               ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
-               goto free_bail;
-       }
-       ret = ipath_eeprom_internal_read(dd, 0, buf, len);
-       if (ret) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "Unable read EEPROM for logging\n");
-               goto free_bail;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
-                               csum, ifp->if_csum);
-               ret = 1;
-               goto free_bail;
-       }
-       hi_water = 0;
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               int new_val = dd->ipath_eep_st_new_errs[idx];
-               if (new_val) {
-                       /*
-                        * If we have seen any errors, add to EEPROM values
-                        * We need to saturate at 0xFF (255) and we also
-                        * would need to adjust the checksum if we were
-                        * trying to minimize EEPROM traffic
-                        * Note that we add to actual current count in EEPROM,
-                        * in case it was altered while we were running.
-                        */
-                       new_val += ifp->if_errcntp[idx];
-                       if (new_val > 0xFF)
-                               new_val = 0xFF;
-                       if (ifp->if_errcntp[idx] != new_val) {
-                               ifp->if_errcntp[idx] = new_val;
-                               hi_water = offsetof(struct ipath_flash,
-                                               if_errcntp) + idx;
-                       }
-                       /*
-                        * update our shadow (used to minimize EEPROM
-                        * traffic), to match what we are about to write.
-                        */
-                       dd->ipath_eep_st_errs[idx] = new_val;
-                       dd->ipath_eep_st_new_errs[idx] = 0;
-               }
-       }
-       /*
-        * now update active-time. We would like to round to the nearest hour
-        * but unless atomic_t are sure to be proper signed ints we cannot,
-        * because we need to account for what we "transfer" to EEPROM and
-        * if we log an hour at 31 minutes, then we would need to set
-        * active_time to -29 to accurately count the _next_ hour.
-        */
-       if (new_time >= 3600) {
-               new_hrs = new_time / 3600;
-               atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
-               new_hrs += dd->ipath_eep_hrs;
-               if (new_hrs > 0xFFFF)
-                       new_hrs = 0xFFFF;
-               dd->ipath_eep_hrs = new_hrs;
-               if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
-                       ifp->if_powerhour[0] = new_hrs & 0xFF;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour);
-               }
-               if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
-                       ifp->if_powerhour[1] = new_hrs >> 8;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour)
-                                       + 1;
-               }
-       }
-       /*
-        * There is a tiny possibility that we could somehow fail to write
-        * the EEPROM after updating our shadows, but problems from holding
-        * the spinlock too long are a much bigger issue.
-        */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       if (hi_water) {
-               /* we made some change to the data, uopdate cksum and write */
-               csum = flash_csum(ifp, 1);
-               ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
-       }
-       mutex_unlock(&dd->ipath_eep_lock);
-       if (ret)
-               ipath_dev_err(dd, "Failed updating EEPROM\n");
-
-free_bail:
-       vfree(buf);
-bail:
-       return ret;
-
-}
-
-/**
- * ipath_inc_eeprom_err - increment one of the four error counters
- * that are logged to EEPROM.
- * @dd: the infinipath device
- * @eidx: 0..3, the counter to increment
- * @incr: how much to add
- *
- * Each counter is 8-bits, and saturates at 255 (0xFF). They
- * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
- * is called, but it can only be called in a context that allows sleep.
- * This function can be called even at interrupt level.
- */
-
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
-{
-       uint new_val;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
-       if (new_val > 255)
-               new_val = 255;
-       dd->ipath_eep_st_new_errs[eidx] = new_val;
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       return;
-}
-
-static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = -ENOENT;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       stop_cmd(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
-               ipath_dbg("Failed tempsense RD startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       /*
-        * We can only clock out one byte per command, sensibly
-        */
-       ret = rd_byte(dd);
-       stop_cmd(dd);
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_RD_REG_MASK 0xBF
-
-/**
- * ipath_tempsense_read - read register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to read from
- *
- * returns reg contents (0..255) or < 0 for error
- */
-int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-
-       if (regnum > 7)
-               return -EINVAL;
-
-       /* return a bogus value for (the one) register we do not have */
-       if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
-               return 0;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_read(dd, regnum);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is actual value (0..255)
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
-
-static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
-                                         u8 regnum, u8 data)
-{
-       int ret = -ENOENT;
-       struct i2c_chain_desc *icd;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       if (ret) {
-               stop_cmd(dd);
-               ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, data);
-       stop_cmd(dd);
-       ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
-                             regnum);
-               ret = -ENXIO;
-       }
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
-
-/**
- * ipath_tempsense_write - write register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to write
- * @data: data to write
- *
- * returns 0 for success or < 0 for error
- */
-int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
-{
-       int ret;
-
-       if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
-               return -EINVAL;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_write(dd, regnum, data);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is 0 for success
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644 (file)
index 6187b84..0000000
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/cdev.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/cpu.h>
-#include <linux/uio.h>
-#include <asm/pgtable.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-#include "ipath_user_sdma.h"
-
-static int ipath_open(struct inode *, struct file *);
-static int ipath_close(struct inode *, struct file *);
-static ssize_t ipath_write(struct file *, const char __user *, size_t,
-                          loff_t *);
-static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
-static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
-static int ipath_mmap(struct file *, struct vm_area_struct *);
-
-/*
- * This is really, really weird shit - write() and writev() here
- * have completely unrelated semantics.  Sucky userland ABI,
- * film at 11.
- */
-static const struct file_operations ipath_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_write,
-       .write_iter = ipath_write_iter,
-       .open = ipath_open,
-       .release = ipath_close,
-       .poll = ipath_poll,
-       .mmap = ipath_mmap,
-       .llseek = noop_llseek,
-};
-
-/*
- * Convert kernel virtual addresses to physical addresses so they don't
- * potentially conflict with the chip addresses used as mmap offsets.
- * It doesn't really matter what mmap offset we use as long as we can
- * interpret it correctly.
- */
-static u64 cvt_kvaddr(void *p)
-{
-       struct page *page;
-       u64 paddr = 0;
-
-       page = vmalloc_to_page(p);
-       if (page)
-               paddr = page_to_pfn(page) << PAGE_SHIFT;
-
-       return paddr;
-}
-
-static int ipath_get_base_info(struct file *fp,
-                              void __user *ubase, size_t ubase_size)
-{
-       struct ipath_portdata *pd = port_fp(fp);
-       int ret = 0;
-       struct ipath_base_info *kinfo = NULL;
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned subport_cnt;
-       int shared, master;
-       size_t sz;
-
-       subport_cnt = pd->port_subport_cnt;
-       if (!subport_cnt) {
-               shared = 0;
-               master = 0;
-               subport_cnt = 1;
-       } else {
-               shared = 1;
-               master = !subport_fp(fp);
-       }
-
-       sz = sizeof(*kinfo);
-       /* If port sharing is not requested, allow the old size structure */
-       if (!shared)
-               sz -= 7 * sizeof(u64);
-       if (ubase_size < sz) {
-               ipath_cdbg(PROC,
-                          "Base size %zu, need %zu (version mismatch?)\n",
-                          ubase_size, sz);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
-       if (kinfo == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ret = dd->ipath_f_get_base_info(pd, kinfo);
-       if (ret < 0)
-               goto bail;
-
-       kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
-       kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
-       kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
-       kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
-       /*
-        * have to mmap whole thing
-        */
-       kinfo->spi_rcv_egrbuftotlen =
-               pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-       kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
-       kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
-               pd->port_rcvegrbuf_chunks;
-       kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
-       if (master)
-               kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
-       /*
-        * for this use, may be ipath_cfgports summed over all chips that
-        * are are configured and present
-        */
-       kinfo->spi_nports = dd->ipath_cfgports;
-       /* unit (chip/board) our port is on */
-       kinfo->spi_unit = dd->ipath_unit;
-       /* for now, only a single page */
-       kinfo->spi_tid_maxsize = PAGE_SIZE;
-
-       /*
-        * Doing this per port, and based on the skip value, etc.  This has
-        * to be the actual buffer size, since the protocol code treats it
-        * as an array.
-        *
-        * These have to be set to user addresses in the user code via mmap.
-        * These values are used on return to user code for the mmap target
-        * addresses only.  For 32 bit, same 44 bit address problem, so use
-        * the physical address, not virtual.  Before 2.6.11, using the
-        * page_address() macro worked, but in 2.6.11, even that returns the
-        * full 64 bit address (upper bits all 1's).  So far, using the
-        * physical addresses (or chip offsets, for chip mapping) works, but
-        * no doubt some future kernel release will change that, and we'll be
-        * on to yet another method of dealing with this.
-        */
-       kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-       kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
-       kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
-       kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
-       kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
-               (void *) dd->ipath_statusp -
-               (void *) dd->ipath_pioavailregs_dma;
-       if (!shared) {
-               kinfo->spi_piocnt = pd->port_piocnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-               kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-       } else if (master) {
-               kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
-                                   (pd->port_piocnt % subport_cnt);
-               /* Master's PIO buffers are after all the slave's */
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign *
-                       (pd->port_piocnt - kinfo->spi_piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign * kinfo->spi_piocnt * slave;
-       }
-
-       if (shared) {
-               kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-               kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
-               kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
-               kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
-
-               kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-                       PAGE_SIZE * subport_fp(fp));
-
-               kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-                       pd->port_rcvhdrq_size * subport_fp(fp));
-               kinfo->spi_rcvhdr_tailaddr = 0;
-               kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-                       pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
-                       subport_fp(fp));
-
-               kinfo->spi_subport_uregbase =
-                       cvt_kvaddr(pd->subport_uregbase);
-               kinfo->spi_subport_rcvegrbuf =
-                       cvt_kvaddr(pd->subport_rcvegrbuf);
-               kinfo->spi_subport_rcvhdr_base =
-                       cvt_kvaddr(pd->subport_rcvhdr_base);
-               ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-                       kinfo->spi_port, kinfo->spi_runtime_flags,
-                       (unsigned long long) kinfo->spi_subport_uregbase,
-                       (unsigned long long) kinfo->spi_subport_rcvegrbuf,
-                       (unsigned long long) kinfo->spi_subport_rcvhdr_base);
-       }
-
-       /*
-        * All user buffers are 2KB buffers.  If we ever support
-        * giving 4KB buffers to user processes, this will need some
-        * work.
-        */
-       kinfo->spi_pioindex = (kinfo->spi_piobufbase -
-               (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
-       kinfo->spi_pioalign = dd->ipath_palign;
-
-       kinfo->spi_qpair = IPATH_KD_QP;
-       /*
-        * user mode PIO buffers are always 2KB, even when 4KB can
-        * be received, and sent via the kernel; this is ibmaxlen
-        * for 2K MTU.
-        */
-       kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
-       kinfo->spi_mtu = dd->ipath_ibmaxlen;    /* maxlen, not ibmtu */
-       kinfo->spi_port = pd->port_port;
-       kinfo->spi_subport = subport_fp(fp);
-       kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
-       kinfo->spi_hw_version = dd->ipath_revision;
-
-       if (master) {
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-       }
-
-       sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
-       if (copy_to_user(ubase, kinfo, sz))
-               ret = -EFAULT;
-
-bail:
-       kfree(kinfo);
-       return ret;
-}
-
-/**
- * ipath_tid_update - update a port TID
- * @pd: the port
- * @fp: the ipath device file
- * @ti: the TID information
- *
- * The new implementation as of Oct 2004 is that the driver assigns
- * the tid and returns it to the caller.   To make it easier to
- * catch bugs, and to reduce search time, we keep a cursor for
- * each port, walking the shadow tid array to find one that's not
- * in use.
- *
- * For now, if we can't allocate the full list, we fail, although
- * in the long run, we'll allocate as many as we can, and the
- * caller will deal with that by trying the remaining pages later.
- * That means that when we fail, we have to mark the tids as not in
- * use again, in our shadow copy.
- *
- * It's up to the caller to free the tids when they are done.
- * We'll unlock the pages as they free them.
- *
- * Also, right now we are locking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.
- */
-static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
-                           const struct ipath_tid_info *ti)
-{
-       int ret = 0, ntids;
-       u32 tid, porttid, cnt, i, tidcnt, tidoff;
-       u16 *tidlist;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 physaddr;
-       unsigned long vaddr;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-       struct page **pagep = NULL;
-       unsigned subport = subport_fp(fp);
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cnt = ti->tidcnt;
-       if (!cnt) {
-               ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
-                         (unsigned long long) ti->tidlist);
-               /*
-                * Should we treat as success?  likely a bug
-                */
-               ret = -EFAULT;
-               goto done;
-       }
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt) {
-               tidcnt = dd->ipath_rcvtidcnt;
-               tid = pd->port_tidcursor;
-               tidoff = 0;
-       } else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               tidoff = dd->ipath_rcvtidcnt - tidcnt;
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               tidoff = tidcnt * (subport - 1);
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       }
-       if (cnt > tidcnt) {
-               /* make sure it all fits in port_tid_pg_list */
-               dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
-                        "TIDs, only trying max (%u)\n", cnt, tidcnt);
-               cnt = tidcnt;
-       }
-       pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
-       tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
-
-       memset(tidmap, 0, sizeof(tidmap));
-       /* before decrement; chip actual # */
-       ntids = tidcnt;
-       tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
-                  pd->port_port, cnt, tid, tidbase);
-
-       /* virtual address of first page in transfer */
-       vaddr = ti->tidvaddr;
-       if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
-                      cnt * PAGE_SIZE)) {
-               ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
-                         (void *)vaddr, cnt);
-               ret = -EFAULT;
-               goto done;
-       }
-       ret = ipath_get_user_pages(vaddr, cnt, pagep);
-       if (ret) {
-               if (ret == -EBUSY) {
-                       ipath_dbg("Failed to lock addr %p, %u pages "
-                                 "(already locked)\n",
-                                 (void *) vaddr, cnt);
-                       /*
-                        * for now, continue, and see what happens but with
-                        * the new implementation, this should never happen,
-                        * unless perhaps the user has mpin'ed the pages
-                        * themselves (something we need to test)
-                        */
-                       ret = 0;
-               } else {
-                       dev_info(&dd->pcidev->dev,
-                                "Failed to lock addr %p, %u pages: "
-                                "errno %d\n", (void *) vaddr, cnt, -ret);
-                       goto done;
-               }
-       }
-       for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
-               for (; ntids--; tid++) {
-                       if (tid == tidcnt)
-                               tid = 0;
-                       if (!dd->ipath_pageshadow[porttid + tid])
-                               break;
-               }
-               if (ntids < 0) {
-                       /*
-                        * oops, wrapped all the way through their TIDs,
-                        * and didn't have enough free; see comments at
-                        * start of routine
-                        */
-                       ipath_dbg("Not enough free TIDs for %u pages "
-                                 "(index %d), failing\n", cnt, i);
-                       i--;    /* last tidlist[i] not filled in */
-                       ret = -ENOMEM;
-                       break;
-               }
-               tidlist[i] = tid + tidoff;
-               ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-                          "vaddr %lx\n", i, tid + tidoff, vaddr);
-               /* we "know" system pages and TID pages are same size */
-               dd->ipath_pageshadow[porttid + tid] = pagep[i];
-               dd->ipath_physshadow[porttid + tid] = ipath_map_page(
-                       dd->pcidev, pagep[i], 0, PAGE_SIZE,
-                       PCI_DMA_FROMDEVICE);
-               /*
-                * don't need atomic or it's overhead
-                */
-               __set_bit(tid, tidmap);
-               physaddr = dd->ipath_physshadow[porttid + tid];
-               ipath_stats.sps_pagelocks++;
-               ipath_cdbg(VERBOSE,
-                          "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
-                          tid, vaddr, (unsigned long long) physaddr,
-                          pagep[i]);
-               dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
-                                   physaddr);
-               /*
-                * don't check this tid in ipath_portshadow, since we
-                * just filled it in; start with the next one.
-                */
-               tid++;
-       }
-
-       if (ret) {
-               u32 limit;
-       cleanup:
-               /* jump here if copy out of updated info failed... */
-               ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
-                         -ret, i, cnt);
-               /* same code that's in ipath_free_tid() */
-               limit = sizeof(tidmap) * BITS_PER_BYTE;
-               if (limit > tidcnt)
-                       /* just in case size changes in future */
-                       limit = tidcnt;
-               tid = find_first_bit((const unsigned long *)tidmap, limit);
-               for (; tid < limit; tid++) {
-                       if (!test_bit(tid, tidmap))
-                               continue;
-                       if (dd->ipath_pageshadow[porttid + tid]) {
-                               ipath_cdbg(VERBOSE, "Freeing TID %u\n",
-                                          tid);
-                               dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                                   RCVHQ_RCV_TYPE_EXPECTED,
-                                                   dd->ipath_tidinvalid);
-                               pci_unmap_page(dd->pcidev,
-                                       dd->ipath_physshadow[porttid + tid],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               dd->ipath_pageshadow[porttid + tid] = NULL;
-                               ipath_stats.sps_pageunlocks++;
-                       }
-               }
-               ipath_release_user_pages(pagep, cnt);
-       } else {
-               /*
-                * Copy the updated array, with ipath_tid's filled in, back
-                * to user.  Since we did the copy in already, this "should
-                * never fail" If it does, we have to clean up...
-                */
-               if (copy_to_user((void __user *)
-                                (unsigned long) ti->tidlist,
-                                tidlist, cnt * sizeof(*tidlist))) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
-                                tidmap, sizeof tidmap)) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (tid == tidcnt)
-                       tid = 0;
-               if (!pd->port_subport_cnt)
-                       pd->port_tidcursor = tid;
-               else
-                       tidcursor_fp(fp) = tid;
-       }
-
-done:
-       if (ret)
-               ipath_dbg("Failed to map %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_tid_free - free a port TID
- * @pd: the port
- * @subport: the subport
- * @ti: the TID info
- *
- * right now we are unlocking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.  We check that the TID is in range for this port
- * but otherwise don't check validity; if user has an error and
- * frees the wrong tid, it's only their own data that can thereby
- * be corrupted.  We do check that the TID was in use, for sanity
- * We always use our idea of the saved address, not the address that
- * they pass in to us.
- */
-
-static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
-                         const struct ipath_tid_info *ti)
-{
-       int ret = 0;
-       u32 tid, porttid, cnt, limit, tidcnt;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
-                          sizeof tidmap)) {
-               ret = -EFAULT;
-               goto done;
-       }
-
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt)
-               tidcnt = dd->ipath_rcvtidcnt;
-       else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               porttid += dd->ipath_rcvtidcnt - tidcnt;
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               porttid += tidcnt * (subport - 1);
-       }
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       limit = sizeof(tidmap) * BITS_PER_BYTE;
-       if (limit > tidcnt)
-               /* just in case size changes in future */
-               limit = tidcnt;
-       tid = find_first_bit(tidmap, limit);
-       ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
-                  "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
-                  limit, tid, porttid);
-       for (cnt = 0; tid < limit; tid++) {
-               /*
-                * small optimization; if we detect a run of 3 or so without
-                * any set, use find_first_bit again.  That's mainly to
-                * accelerate the case where we wrapped, so we have some at
-                * the beginning, and some at the end, and a big gap
-                * in the middle.
-                */
-               if (!test_bit(tid, tidmap))
-                       continue;
-               cnt++;
-               if (dd->ipath_pageshadow[porttid + tid]) {
-                       struct page *p;
-                       p = dd->ipath_pageshadow[porttid + tid];
-                       dd->ipath_pageshadow[porttid + tid] = NULL;
-                       ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-                                  pid_nr(pd->port_pid), tid);
-                       dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                           RCVHQ_RCV_TYPE_EXPECTED,
-                                           dd->ipath_tidinvalid);
-                       pci_unmap_page(dd->pcidev,
-                               dd->ipath_physshadow[porttid + tid],
-                               PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                       ipath_release_user_pages(&p, 1);
-                       ipath_stats.sps_pageunlocks++;
-               } else
-                       ipath_dbg("Unused tid %u, ignoring\n", tid);
-       }
-       if (cnt != ti->tidcnt)
-               ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
-                         ti->tidcnt, cnt);
-done:
-       if (ret)
-               ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_set_part_key - set a partition key
- * @pd: the port
- * @key: the key
- *
- * We can have up to 4 active at a time (other than the default, which is
- * always allowed).  This is somewhat tricky, since multiple ports may set
- * the same key, so we reference count them, and clean up at exit.  All 4
- * partition keys are packed into a single infinipath register.  It's an
- * error for a process to set the same pkey multiple times.  We provide no
- * mechanism to de-allocate a pkey at this time, we may eventually need to
- * do that.  I've used the atomic operations, and no locking, and only make
- * a single pass through what's available.  This should be more than
- * adequate for some time. I'll think about spinlocks or the like if and as
- * it's necessary.
- */
-static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int i, any = 0, pidx = -1;
-       u16 lkey = key & 0x7FFF;
-       int ret;
-
-       if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
-               /* nothing to do; this key always valid */
-               ret = 0;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
-                  "%hx:%x %hx:%x %hx:%x %hx:%x\n",
-                  pd->port_port, key, dd->ipath_pkeys[0],
-                  atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
-                  atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
-                  atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
-                  atomic_read(&dd->ipath_pkeyrefs[3]));
-
-       if (!lkey) {
-               ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
-                          pd->port_port);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Set the full membership bit, because it has to be
-        * set in the register or the packet, and it seems
-        * cleaner to set in the register than to force all
-        * callers to set it. (see bug 4331)
-        */
-       key |= 0x8000;
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i] && pidx == -1)
-                       pidx = i;
-               if (pd->port_pkeys[i] == key) {
-                       ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
-                                  "(%x) more than once\n",
-                                  pd->port_port, key);
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (pidx == -1) {
-               ipath_dbg("All pkeys for port %u already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               if (dd->ipath_pkeys[i] == key) {
-                       atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
-
-                       if (atomic_inc_return(pkrefs) > 1) {
-                               pd->port_pkeys[pidx] = key;
-                               ipath_cdbg(VERBOSE, "p%u set key %x "
-                                          "matches #%d, count now %d\n",
-                                          pd->port_port, key, i,
-                                          atomic_read(pkrefs));
-                               ret = 0;
-                               goto bail;
-                       } else {
-                               /*
-                                * lost race, decrement count, catch below
-                                */
-                               atomic_dec(pkrefs);
-                               ipath_cdbg(VERBOSE, "Lost race, count was "
-                                          "0, after dec, it's %d\n",
-                                          atomic_read(pkrefs));
-                               any++;
-                       }
-               }
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       /*
-                        * It makes no sense to have both the limited and
-                        * full membership PKEY set at the same time since
-                        * the unlimited one will disable the limited one.
-                        */
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ipath_dbg("port %u, all pkeys already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       u64 pkey;
-
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
-                       pkey =
-                               (u64) dd->ipath_pkeys[0] |
-                               ((u64) dd->ipath_pkeys[1] << 16) |
-                               ((u64) dd->ipath_pkeys[2] << 32) |
-                               ((u64) dd->ipath_pkeys[3] << 48);
-                       ipath_cdbg(PROC, "p%u set key %x in #%d, "
-                                  "portidx %d, new pkey reg %llx\n",
-                                  pd->port_port, key, i, pidx,
-                                  (unsigned long long) pkey);
-                       ipath_write_kreg(
-                               dd, dd->ipath_kregs->kr_partitionkey, pkey);
-
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       ipath_dbg("port %u, all pkeys already in use 2nd pass, "
-                 "can't set %x\n", pd->port_port, key);
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_manage_rcvq - manage a port's receive queue
- * @pd: the port
- * @subport: the subport
- * @start_stop: action to carry out
- *
- * start_stop == 0 disables receive on the port, for use in queue
- * overflow conditions.  start_stop==1 re-enables, to be used to
- * re-init the software copy of the head register
- */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
-                            int start_stop)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-
-       ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
-                  start_stop ? "en" : "dis", dd->ipath_unit,
-                  pd->port_port, subport);
-       if (subport)
-               goto bail;
-       /* atomically clear receive enable port. */
-       if (start_stop) {
-               /*
-                * On enable, force in-memory copy of the tail register to
-                * 0, so that protocol code doesn't have to worry about
-                * whether or not the chip has yet updated the in-memory
-                * copy or not on return from the system call. The chip
-                * always resets it's tail register back to 0 on a
-                * transition from disabled to enabled.  This could cause a
-                * problem if software was broken, and did the enable w/o
-                * the disable, but eventually the in-memory copy will be
-                * updated and correct itself, even in the face of software
-                * bugs.
-                */
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                       &dd->ipath_rcvctrl);
-       } else
-               clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                         &dd->ipath_rcvctrl);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* now be sure chip saw it before we return */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       if (start_stop) {
-               /*
-                * And try to be sure that tail reg update has happened too.
-                * This should in theory interlock with the RXE changes to
-                * the tail register.  Don't assign it to the tail register
-                * in memory copy, since we could overwrite an update by the
-                * chip if we did.
-                */
-               ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-       }
-       /* always; new head should be equal to new tail; see above */
-bail:
-       return 0;
-}
-
-static void ipath_clean_part_key(struct ipath_portdata *pd,
-                                struct ipath_devdata *dd)
-{
-       int i, j, pchanged = 0;
-       u64 oldpkey;
-
-       /* for debugging only */
-       oldpkey = (u64) dd->ipath_pkeys[0] |
-               ((u64) dd->ipath_pkeys[1] << 16) |
-               ((u64) dd->ipath_pkeys[2] << 32) |
-               ((u64) dd->ipath_pkeys[3] << 48);
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i])
-                       continue;
-               ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
-                          pd->port_pkeys[i]);
-               for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
-                       /* check for match independent of the global bit */
-                       if ((dd->ipath_pkeys[j] & 0x7fff) !=
-                           (pd->port_pkeys[i] & 0x7fff))
-                               continue;
-                       if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
-                               ipath_cdbg(VERBOSE, "p%u clear key "
-                                          "%x matches #%d\n",
-                                          pd->port_port,
-                                          pd->port_pkeys[i], j);
-                               ipath_stats.sps_pkeys[j] =
-                                       dd->ipath_pkeys[j] = 0;
-                               pchanged++;
-                       } else {
-                               ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
-                                          "but ref still %d\n", pd->port_port,
-                                          pd->port_pkeys[i], j,
-                                          atomic_read(&dd->ipath_pkeyrefs[j]));
-                               break;
-                       }
-               }
-               pd->port_pkeys[i] = 0;
-       }
-       if (pchanged) {
-               u64 pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
-                          "new pkey reg %llx\n", pd->port_port,
-                          (unsigned long long) oldpkey,
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-       }
-}
-
-/*
- * Initialize the port data with the receive buffer sizes
- * so this can be done while the master port is locked.
- * Otherwise, there is a race with a slave opening the port
- * and seeing these fields uninitialized.
- */
-static void init_user_egr_sizes(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned egrperchunk, egrcnt, size;
-
-       /*
-        * to avoid wasting a lot of memory, we allocate 32KB chunks of
-        * physically contiguous memory, advance through it until used up
-        * and then allocate more.  Of course, we need memory to store those
-        * extra pointers, now.  Started out with 256KB, but under heavy
-        * memory pressure (creating large files and then copying them over
-        * NFS while doing lots of MPI jobs), we hit some allocation
-        * failures, even though we can sleep...  (2.6.10) Still get
-        * failures at 64K.  32K is the lowest we can go without wasting
-        * additional memory.
-        */
-       size = 0x8000;
-       egrperchunk = size / dd->ipath_rcvegrbufsize;
-       egrcnt = dd->ipath_rcvegrcnt;
-       pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
-       pd->port_rcvegrbufs_perchunk = egrperchunk;
-       pd->port_rcvegrbuf_size = size;
-}
-
-/**
- * ipath_create_user_egr - allocate eager TID buffers
- * @pd: the port to allocate TID buffers for
- *
- * This routine is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance
- * This is the user port version
- *
- * Allocate the eager TID buffers and program them into infinipath
- * They are no longer completely contiguous, we do multiple allocation
- * calls.
- */
-static int ipath_create_user_egr(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-       size_t size;
-       int ret;
-       gfp_t gfp_flags;
-
-       /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-        * coalesced (we hope); otherwise, even at order 4,
-        * heavy filesystem activity makes these fail, and we can
-        * use compound pages.
-        */
-       gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-       egrcnt = dd->ipath_rcvegrcnt;
-       /* TID number offset for this port */
-       egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
-       egrsize = dd->ipath_rcvegrbufsize;
-       ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
-                  "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
-
-       chunk = pd->port_rcvegrbuf_chunks;
-       egrperchunk = pd->port_rcvegrbufs_perchunk;
-       size = pd->port_rcvegrbuf_size;
-       pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
-                                          GFP_KERNEL);
-       if (!pd->port_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       pd->port_rcvegrbuf_phys =
-               kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
-                             GFP_KERNEL);
-       if (!pd->port_rcvegrbuf_phys) {
-               ret = -ENOMEM;
-               goto bail_rcvegrbuf;
-       }
-       for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-
-               pd->port_rcvegrbuf[e] = dma_alloc_coherent(
-                       &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
-                       gfp_flags);
-
-               if (!pd->port_rcvegrbuf[e]) {
-                       ret = -ENOMEM;
-                       goto bail_rcvegrbuf_phys;
-               }
-       }
-
-       pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
-
-       for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
-               dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
-               unsigned i;
-
-               for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
-                       dd->ipath_f_put_tid(dd, e + egroff +
-                                           (u64 __iomem *)
-                                           ((char __iomem *)
-                                            dd->ipath_kregbase +
-                                            dd->ipath_rcvegrbase),
-                                           RCVHQ_RCV_TYPE_EAGER, pa);
-                       pa += egrsize;
-               }
-               cond_resched(); /* don't hog the cpu */
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_rcvegrbuf_phys:
-       for (e = 0; e < pd->port_rcvegrbuf_chunks &&
-               pd->port_rcvegrbuf[e]; e++) {
-               dma_free_coherent(&dd->pcidev->dev, size,
-                                 pd->port_rcvegrbuf[e],
-                                 pd->port_rcvegrbuf_phys[e]);
-
-       }
-       kfree(pd->port_rcvegrbuf_phys);
-       pd->port_rcvegrbuf_phys = NULL;
-bail_rcvegrbuf:
-       kfree(pd->port_rcvegrbuf);
-       pd->port_rcvegrbuf = NULL;
-bail:
-       return ret;
-}
-
-
-/* common code for the mappings on dma_alloc_coherent mem */
-static int ipath_mmap_mem(struct vm_area_struct *vma,
-       struct ipath_portdata *pd, unsigned len, int write_ok,
-       void *kvaddr, char *what)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long pfn;
-       int ret;
-
-       if ((vma->vm_end - vma->vm_start) > len) {
-               dev_info(&dd->pcidev->dev,
-                        "FAIL on %s: len %lx > %x\n", what,
-                        vma->vm_end - vma->vm_start, len);
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       if (!write_ok) {
-               if (vma->vm_flags & VM_WRITE) {
-                       dev_info(&dd->pcidev->dev,
-                                "%s must be mapped readonly\n", what);
-                       ret = -EPERM;
-                       goto bail;
-               }
-
-               /* don't allow them to later change with mprotect */
-               vma->vm_flags &= ~VM_MAYWRITE;
-       }
-
-       pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
-       ret = remap_pfn_range(vma, vma->vm_start, pfn,
-                             len, vma->vm_page_prot);
-       if (ret)
-               dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
-                        "bytes r%c failed: %d\n", what, pd->port_port,
-                        pfn, len, write_ok?'w':'o', ret);
-       else
-               ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
-                          "r%c\n", what, pd->port_port, pfn, len,
-                          write_ok?'w':'o');
-bail:
-       return ret;
-}
-
-static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
-                    u64 ureg)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * This is real hardware, so use io_remap.  This is the mechanism
-        * for the user process to update the head registers for their port
-        * in the chip.
-        */
-       if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
-                        "%lx > PAGE\n", vma->vm_end - vma->vm_start);
-               ret = -EFAULT;
-       } else {
-               phys = dd->ipath_physaddr + ureg;
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-               vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-               ret = io_remap_pfn_range(vma, vma->vm_start,
-                                        phys >> PAGE_SHIFT,
-                                        vma->vm_end - vma->vm_start,
-                                        vma->vm_page_prot);
-       }
-       return ret;
-}
-
-static int mmap_piobufs(struct vm_area_struct *vma,
-                       struct ipath_devdata *dd,
-                       struct ipath_portdata *pd,
-                       unsigned piobufs, unsigned piocnt)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * When we map the PIO buffers in the chip, we want to map them as
-        * writeonly, no read possible.   This prevents access to previous
-        * process data, and catches users who might try to read the i/o
-        * space due to a bug.
-        */
-       if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
-                        "reqlen %lx > PAGE\n",
-                        vma->vm_end - vma->vm_start);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       phys = dd->ipath_physaddr + piobufs;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-       pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
-       pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
-#endif
-
-       /*
-        * don't allow them to later change to readable with mprotect (for when
-        * not initially mapped readable, as is normally the case)
-        */
-       vma->vm_flags &= ~VM_MAYREAD;
-       vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-
-       ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-                                vma->vm_end - vma->vm_start,
-                                vma->vm_page_prot);
-bail:
-       return ret;
-}
-
-static int mmap_rcvegrbufs(struct vm_area_struct *vma,
-                          struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long start, size;
-       size_t total_size, i;
-       unsigned long pfn;
-       int ret;
-
-       size = pd->port_rcvegrbuf_size;
-       total_size = pd->port_rcvegrbuf_chunks * size;
-       if ((vma->vm_end - vma->vm_start) > total_size) {
-               dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
-                        "reqlen %lx > actual %lx\n",
-                        vma->vm_end - vma->vm_start,
-                        (unsigned long) total_size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (vma->vm_flags & VM_WRITE) {
-               dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
-                        "writable (flags=%lx)\n", vma->vm_flags);
-               ret = -EPERM;
-               goto bail;
-       }
-       /* don't allow them to later change to writeable with mprotect */
-       vma->vm_flags &= ~VM_MAYWRITE;
-
-       start = vma->vm_start;
-
-       for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-               pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
-               ret = remap_pfn_range(vma, start, pfn, size,
-                                     vma->vm_page_prot);
-               if (ret < 0)
-                       goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/*
- * ipath_file_vma_fault - handle a VMA page fault.
- */
-static int ipath_file_vma_fault(struct vm_area_struct *vma,
-                                       struct vm_fault *vmf)
-{
-       struct page *page;
-
-       page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
-       if (!page)
-               return VM_FAULT_SIGBUS;
-       get_page(page);
-       vmf->page = page;
-
-       return 0;
-}
-
-static const struct vm_operations_struct ipath_file_vm_ops = {
-       .fault = ipath_file_vma_fault,
-};
-
-static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-                      struct ipath_portdata *pd, unsigned subport)
-{
-       unsigned long len;
-       struct ipath_devdata *dd;
-       void *addr;
-       size_t size;
-       int ret = 0;
-
-       /* If the port is not shared, all addresses should be physical */
-       if (!pd->port_subport_cnt)
-               goto bail;
-
-       dd = pd->port_dd;
-       size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-
-       /*
-        * Each process has all the subport uregbase, rcvhdrq, and
-        * rcvegrbufs mmapped - as an array for all the processes,
-        * and also separately for this process.
-        */
-       if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-               addr = pd->subport_uregbase;
-               size = PAGE_SIZE * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-               addr = pd->subport_rcvhdr_base;
-               size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-               addr = pd->subport_rcvegrbuf;
-               size *= pd->port_subport_cnt;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-                                        PAGE_SIZE * subport)) {
-                addr = pd->subport_uregbase + PAGE_SIZE * subport;
-                size = PAGE_SIZE;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-                                pd->port_rcvhdrq_size * subport)) {
-                addr = pd->subport_rcvhdr_base +
-                        pd->port_rcvhdrq_size * subport;
-                size = pd->port_rcvhdrq_size;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-                               size * subport)) {
-                addr = pd->subport_rcvegrbuf + size * subport;
-                /* rcvegrbufs are read-only on the slave */
-                if (vma->vm_flags & VM_WRITE) {
-                        dev_info(&dd->pcidev->dev,
-                                 "Can't map eager buffers as "
-                                 "writable (flags=%lx)\n", vma->vm_flags);
-                        ret = -EPERM;
-                        goto bail;
-                }
-                /*
-                 * Don't allow permission to later change to writeable
-                 * with mprotect.
-                 */
-                vma->vm_flags &= ~VM_MAYWRITE;
-       } else {
-               goto bail;
-       }
-       len = vma->vm_end - vma->vm_start;
-       if (len > size) {
-               ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-       vma->vm_ops = &ipath_file_vm_ops;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_mmap - mmap various structures into user space
- * @fp: the file pointer
- * @vma: the VM area
- *
- * We use this to have a shared buffer between the kernel and the user code
- * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
- * buffers in the chip.  We have the open and close entries so we can bump
- * the ref count and keep the driver from being unloaded while still mapped.
- */
-static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       u64 pgaddr, ureg;
-       unsigned piobufs, piocnt;
-       int ret;
-
-       pd = port_fp(fp);
-       if (!pd) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       dd = pd->port_dd;
-
-       /*
-        * This is the ipath_do_user_init() code, mapping the shared buffers
-        * into the user process. The address referred to by vm_pgoff is the
-        * file offset passed via mmap().  For shared ports, this is the
-        * kernel vmalloc() address of the pages to share with the master.
-        * For non-shared or master ports, this is a physical address.
-        * We only do one mmap for each space mapped.
-        */
-       pgaddr = vma->vm_pgoff << PAGE_SHIFT;
-
-       /*
-        * Check for 0 in case one of the allocations failed, but user
-        * called mmap anyway.
-        */
-       if (!pgaddr)  {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
-                  (unsigned long long) pgaddr, vma->vm_start,
-                  vma->vm_end - vma->vm_start, dd->ipath_unit,
-                  pd->port_port, subport_fp(fp));
-
-       /*
-        * Physical addresses must fit in 40 bits for our hardware.
-        * Check for kernel virtual addresses first, anything else must
-        * match a HW or memory address.
-        */
-       ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
-       if (ret) {
-               if (ret > 0)
-                       ret = 0;
-               goto bail;
-       }
-
-       ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
-       if (!pd->port_subport_cnt) {
-               /* port is not shared */
-               piocnt = pd->port_piocnt;
-               piobufs = pd->port_piobufs;
-       } else if (!subport_fp(fp)) {
-               /* caller is the master */
-               piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
-                        (pd->port_piocnt % pd->port_subport_cnt);
-               piobufs = pd->port_piobufs +
-                       dd->ipath_palign * (pd->port_piocnt - piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               /* caller is a slave */
-               piocnt = pd->port_piocnt / pd->port_subport_cnt;
-               piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
-       }
-
-       if (pgaddr == ureg)
-               ret = mmap_ureg(vma, dd, ureg);
-       else if (pgaddr == piobufs)
-               ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
-       else if (pgaddr == dd->ipath_pioavailregs_phys)
-               /* in-memory copy of pioavail registers */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    (void *) dd->ipath_pioavailregs_dma,
-                                    "pioavail registers");
-       else if (pgaddr == pd->port_rcvegr_phys)
-               ret = mmap_rcvegrbufs(vma, pd);
-       else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
-               /*
-                * The rcvhdrq itself; readonly except on HT (so have
-                * to allow writable mapping), multiple pages, contiguous
-                * from an i/o perspective.
-                */
-               ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
-                                    pd->port_rcvhdrq,
-                                    "rcvhdrq");
-       else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
-               /* in-memory copy of rcvhdrq tail register */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    pd->port_rcvhdrtail_kvaddr,
-                                    "rcvhdrq tail");
-       else
-               ret = -EINVAL;
-
-       vma->vm_private_data = NULL;
-
-       if (ret < 0)
-               dev_info(&dd->pcidev->dev,
-                        "Failure %d on off %llx len %lx\n",
-                        -ret, (unsigned long long)pgaddr,
-                        vma->vm_end - vma->vm_start);
-bail:
-       return ret;
-}
-
-static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
-{
-       unsigned pollflag = 0;
-
-       if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
-           pd->port_hdrqfull != pd->port_hdrqfull_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_hdrqfull_poll = pd->port_hdrqfull;
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
-                                     struct file *fp,
-                                     struct poll_table_struct *pt)
-{
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       if (pd->port_urgent != pd->port_urgent_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_urgent_poll = pd->port_urgent;
-       }
-
-       if (!pollflag) {
-               /* this saves a spin_lock/unlock in interrupt handler... */
-               set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-               /* flush waiting flag so don't miss an event... */
-               wmb();
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_next(struct ipath_portdata *pd,
-                                   struct file *fp,
-                                   struct poll_table_struct *pt)
-{
-       u32 head;
-       u32 tail;
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-       if (pd->port_rcvhdrtail_kvaddr)
-               tail = ipath_get_rcvhdrtail(pd);
-       else
-               tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-
-       if (head != tail)
-               pollflag |= POLLIN | POLLRDNORM;
-       else {
-               /* this saves a spin_lock/unlock in interrupt handler */
-               set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-               /* flush waiting flag so we don't miss an event */
-               wmb();
-
-               set_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                       &dd->ipath_rcvctrl);
-
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-
-               if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-                       ipath_write_ureg(dd, ur_rcvhdrhead,
-                                        dd->ipath_rhdrhead_intr_off | head,
-                                        pd->port_port);
-
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll(struct file *fp,
-                              struct poll_table_struct *pt)
-{
-       struct ipath_portdata *pd;
-       unsigned pollflag;
-
-       pd = port_fp(fp);
-       if (!pd)
-               pollflag = 0;
-       else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
-               pollflag = ipath_poll_urgent(pd, fp, pt);
-       else
-               pollflag = ipath_poll_next(pd, fp, pt);
-
-       return pollflag;
-}
-
-static int ipath_supports_subports(int user_swmajor, int user_swminor)
-{
-       /* no subport implementation prior to software version 1.3 */
-       return (user_swmajor > 1) || (user_swminor >= 3);
-}
-
-static int ipath_compatible_subports(int user_swmajor, int user_swminor)
-{
-       /* this code is written long-hand for clarity */
-       if (IPATH_USER_SWMAJOR != user_swmajor) {
-               /* no promise of compatibility if major mismatch */
-               return 0;
-       }
-       if (IPATH_USER_SWMAJOR == 1) {
-               switch (IPATH_USER_SWMINOR) {
-               case 0:
-               case 1:
-               case 2:
-                       /* no subport implementation so cannot be compatible */
-                       return 0;
-               case 3:
-                       /* 3 is only compatible with itself */
-                       return user_swminor == 3;
-               default:
-                       /* >= 4 are compatible (or are expected to be) */
-                       return user_swminor >= 4;
-               }
-       }
-       /* make no promises yet for future major versions */
-       return 0;
-}
-
-static int init_subports(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd,
-                        const struct ipath_user_info *uinfo)
-{
-       int ret = 0;
-       unsigned num_subports;
-       size_t size;
-
-       /*
-        * If the user is requesting zero subports,
-        * skip the subport allocation.
-        */
-       if (uinfo->spu_subport_cnt <= 0)
-               goto bail;
-
-       /* Self-consistency check for ipath_compatible_subports() */
-       if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
-           !ipath_compatible_subports(IPATH_USER_SWMAJOR,
-                                      IPATH_USER_SWMINOR)) {
-               dev_info(&dd->pcidev->dev,
-                        "Inconsistent ipath_compatible_subports()\n");
-               goto bail;
-       }
-
-       /* Check for subport compatibility */
-       if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
-                                      uinfo->spu_userversion & 0xffff)) {
-               dev_info(&dd->pcidev->dev,
-                        "Mismatched user version (%d.%d) and driver "
-                        "version (%d.%d) while port sharing. Ensure "
-                         "that driver and library are from the same "
-                         "release.\n",
-                        (int) (uinfo->spu_userversion >> 16),
-                         (int) (uinfo->spu_userversion & 0xffff),
-                        IPATH_USER_SWMAJOR,
-                        IPATH_USER_SWMINOR);
-               goto bail;
-       }
-       if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       num_subports = uinfo->spu_subport_cnt;
-       pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
-       if (!pd->subport_uregbase) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
-       size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                    sizeof(u32), PAGE_SIZE) * num_subports;
-       pd->subport_rcvhdr_base = vzalloc(size);
-       if (!pd->subport_rcvhdr_base) {
-               ret = -ENOMEM;
-               goto bail_ureg;
-       }
-
-       pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
-                                       pd->port_rcvegrbuf_size *
-                                       num_subports);
-       if (!pd->subport_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail_rhdr;
-       }
-
-       pd->port_subport_cnt = uinfo->spu_subport_cnt;
-       pd->port_subport_id = uinfo->spu_subport_id;
-       pd->active_slaves = 1;
-       set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-       goto bail;
-
-bail_rhdr:
-       vfree(pd->subport_rcvhdr_base);
-bail_ureg:
-       vfree(pd->subport_uregbase);
-       pd->subport_uregbase = NULL;
-bail:
-       return ret;
-}
-
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-                         struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_portdata *pd;
-       int ret;
-
-       if (!(pd = dd->ipath_pd[port])) {
-               void *ptmp;
-
-               pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
-
-               /*
-                * Allocate memory for use in ipath_tid_update() just once
-                * at open, not per call.  Reduces cost of expected send
-                * setup.
-                */
-               ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
-                              dd->ipath_rcvtidcnt * sizeof(struct page **),
-                              GFP_KERNEL);
-               if (!pd || !ptmp) {
-                       ipath_dev_err(dd, "Unable to allocate portdata "
-                                     "memory, failing open\n");
-                       ret = -ENOMEM;
-                       kfree(pd);
-                       kfree(ptmp);
-                       goto bail;
-               }
-               dd->ipath_pd[port] = pd;
-               dd->ipath_pd[port]->port_port = port;
-               dd->ipath_pd[port]->port_dd = dd;
-               dd->ipath_pd[port]->port_tid_pg_list = ptmp;
-               init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
-       }
-       if (!pd->port_cnt) {
-               pd->userversion = uinfo->spu_userversion;
-               init_user_egr_sizes(pd);
-               if ((ret = init_subports(dd, pd, uinfo)) != 0)
-                       goto bail;
-               ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
-                          current->comm, current->pid, dd->ipath_unit,
-                          port);
-               pd->port_cnt = 1;
-               port_fp(fp) = pd;
-               pd->port_pid = get_pid(task_pid(current));
-               strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-               ipath_stats.sps_ports++;
-               ret = 0;
-       } else
-               ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static inline int usable(struct ipath_devdata *dd)
-{
-       return dd &&
-               (dd->ipath_flags & IPATH_PRESENT) &&
-               dd->ipath_kregbase &&
-               dd->ipath_lid &&
-               !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
-                                    | IPATH_LINKUNK));
-}
-
-static int find_free_port(int unit, struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       int ret, i;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (!usable(dd)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               ret = try_alloc_port(dd, i, fp, uinfo);
-               if (ret != -EBUSY)
-                       goto bail;
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static int find_best_unit(struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       int ret = 0, i, prefunit = -1, devmax;
-       int maxofallports, npresent, nup;
-       int ndev;
-
-       devmax = ipath_count_units(&npresent, &nup, &maxofallports);
-
-       /*
-        * This code is present to allow a knowledgeable person to
-        * specify the layout of processes to processors before opening
-        * this driver, and then we'll assign the process to the "closest"
-        * InfiniPath chip to that processor (we assume reasonable connectivity,
-        * for now).  This code assumes that if affinity has been set
-        * before this point, that at most one cpu is set; for now this
-        * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
-        * in case some kernel variant sets none of the bits when no
-        * affinity is set.  2.6.11 and 12 kernels have all present
-        * cpus set.  Some day we'll have to fix it up further to handle
-        * a cpu subset.  This algorithm fails for two HT chips connected
-        * in tunnel fashion.  Eventually this needs real topology
-        * information.  There may be some issues with dual core numbering
-        * as well.  This needs more work prior to release.
-        */
-       if (!cpumask_empty(tsk_cpus_allowed(current)) &&
-           !cpumask_full(tsk_cpus_allowed(current))) {
-               int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
-               get_online_cpus();
-               for_each_online_cpu(i)
-                       if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
-                               ipath_cdbg(PROC, "%s[%u] affinity set for "
-                                          "cpu %d/%d\n", current->comm,
-                                          current->pid, i, ncpus);
-                               curcpu = i;
-                               nset++;
-                       }
-               put_online_cpus();
-               if (curcpu != -1 && nset != ncpus) {
-                       if (npresent) {
-                               prefunit = curcpu / (ncpus / npresent);
-                               ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-                                         "%d cpus/chip, select unit %d\n",
-                                         current->comm, current->pid,
-                                         npresent, ncpus, ncpus / npresent,
-                                         prefunit);
-                       }
-               }
-       }
-
-       /*
-        * user ports start at 1, kernel port is 0
-        * For now, we do round-robin access across all chips
-        */
-
-       if (prefunit != -1)
-               devmax = prefunit + 1;
-recheck:
-       for (i = 1; i < maxofallports; i++) {
-               for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
-                    ndev++) {
-                       struct ipath_devdata *dd = ipath_lookup(ndev);
-
-                       if (!usable(dd))
-                               continue; /* can't use this unit */
-                       if (i >= dd->ipath_cfgports)
-                               /*
-                                * Maxed out on users of this unit. Try
-                                * next.
-                                */
-                               continue;
-                       ret = try_alloc_port(dd, i, fp, uinfo);
-                       if (!ret)
-                               goto done;
-               }
-       }
-
-       if (npresent) {
-               if (nup == 0) {
-                       ret = -ENETDOWN;
-                       ipath_dbg("No ports available (none initialized "
-                                 "and ready)\n");
-               } else {
-                       if (prefunit > 0) {
-                               /* if started above 0, retry from 0 */
-                               ipath_cdbg(PROC,
-                                          "%s[%u] no ports on prefunit "
-                                          "%d, clear and re-check\n",
-                                          current->comm, current->pid,
-                                          prefunit);
-                               devmax = ipath_count_units(NULL, NULL,
-                                                          NULL);
-                               prefunit = -1;
-                               goto recheck;
-                       }
-                       ret = -EBUSY;
-                       ipath_dbg("No ports available\n");
-               }
-       } else {
-               ret = -ENXIO;
-               ipath_dbg("No boards found\n");
-       }
-
-done:
-       return ret;
-}
-
-static int find_shared_port(struct file *fp,
-                           const struct ipath_user_info *uinfo)
-{
-       int devmax, ndev, i;
-       int ret = 0;
-
-       devmax = ipath_count_units(NULL, NULL, NULL);
-
-       for (ndev = 0; ndev < devmax; ndev++) {
-               struct ipath_devdata *dd = ipath_lookup(ndev);
-
-               if (!usable(dd))
-                       continue;
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       /* Skip ports which are not yet open */
-                       if (!pd || !pd->port_cnt)
-                               continue;
-                       /* Skip port if it doesn't match the requested one */
-                       if (pd->port_subport_id != uinfo->spu_subport_id)
-                               continue;
-                       /* Verify the sharing process matches the master */
-                       if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
-                           pd->userversion != uinfo->spu_userversion ||
-                           pd->port_cnt >= pd->port_subport_cnt) {
-                               ret = -EINVAL;
-                               goto done;
-                       }
-                       port_fp(fp) = pd;
-                       subport_fp(fp) = pd->port_cnt++;
-                       pd->port_subpid[subport_fp(fp)] =
-                               get_pid(task_pid(current));
-                       tidcursor_fp(fp) = 0;
-                       pd->active_slaves |= 1 << subport_fp(fp);
-                       ipath_cdbg(PROC,
-                                  "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
-                                  current->comm, current->pid,
-                                  subport_fp(fp),
-                                  pd->port_comm, pid_nr(pd->port_pid),
-                                  dd->ipath_unit, pd->port_port);
-                       ret = 1;
-                       goto done;
-               }
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_open(struct inode *in, struct file *fp)
-{
-       /* The real work is performed later in ipath_assign_port() */
-       fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
-       return fp->private_data ? 0 : -ENOMEM;
-}
-
-/* Get port early, so can set affinity prior to memory allocation */
-static int ipath_assign_port(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       int i_minor;
-       unsigned swmajor, swminor;
-
-       /* Check to be sure we haven't already initialized this file */
-       if (port_fp(fp)) {
-               ret = -EINVAL;
-               goto done;
-       }
-
-       /* for now, if major version is different, bail */
-       swmajor = uinfo->spu_userversion >> 16;
-       if (swmajor != IPATH_USER_SWMAJOR) {
-               ipath_dbg("User major version %d not same as driver "
-                         "major %d\n", uinfo->spu_userversion >> 16,
-                         IPATH_USER_SWMAJOR);
-               ret = -ENODEV;
-               goto done;
-       }
-
-       swminor = uinfo->spu_userversion & 0xffff;
-       if (swminor != IPATH_USER_SWMINOR)
-               ipath_dbg("User minor version %d not same as driver "
-                         "minor %d\n", swminor, IPATH_USER_SWMINOR);
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_compatible_subports(swmajor, swminor) &&
-           uinfo->spu_subport_cnt &&
-           (ret = find_shared_port(fp, uinfo))) {
-               if (ret > 0)
-                       ret = 0;
-               goto done_chk_sdma;
-       }
-
-       i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
-       ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-                  (long)file_inode(fp)->i_rdev, i_minor);
-
-       if (i_minor)
-               ret = find_free_port(i_minor - 1, fp, uinfo);
-       else
-               ret = find_best_unit(fp, uinfo);
-
-done_chk_sdma:
-       if (!ret) {
-               struct ipath_filedata *fd = fp->private_data;
-               const struct ipath_portdata *pd = fd->pd;
-               const struct ipath_devdata *dd = pd->port_dd;
-
-               fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
-                                                     dd->ipath_unit,
-                                                     pd->port_port,
-                                                     fd->subport);
-
-               if (!fd->pq)
-                       ret = -ENOMEM;
-       }
-
-       mutex_unlock(&ipath_mutex);
-
-done:
-       return ret;
-}
-
-
-static int ipath_do_user_init(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       struct ipath_portdata *pd = port_fp(fp);
-       struct ipath_devdata *dd;
-       u32 head32;
-
-       /* Subports don't need to initialize anything since master did it. */
-       if (subport_fp(fp)) {
-               ret = wait_event_interruptible(pd->port_wait,
-                       !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
-               goto done;
-       }
-
-       dd = pd->port_dd;
-
-       if (uinfo->spu_rcvhdrsize) {
-               ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-               if (ret)
-                       goto done;
-       }
-
-       /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-       /* some ports may get extra buffers, calculate that here */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_piocnt = dd->ipath_pbufsport + 1;
-       else
-               pd->port_piocnt = dd->ipath_pbufsport;
-
-       /* for right now, kernel piobufs are at end, so port 1 is at 0 */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_pio_base = (dd->ipath_pbufsport + 1)
-                       * (pd->port_port - 1);
-       else
-               pd->port_pio_base = dd->ipath_ports_extrabuf +
-                       dd->ipath_pbufsport * (pd->port_port - 1);
-       pd->port_piobufs = dd->ipath_piobufbase +
-               pd->port_pio_base * dd->ipath_palign;
-       ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
-               " first pio %u\n", pd->port_port, pd->port_piobufs,
-               pd->port_piocnt, pd->port_pio_base);
-       ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
-
-       /*
-        * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-        * array for time being.  If pd->port_port > chip-supported,
-        * we need to do extra stuff here to handle by handling overflow
-        * through port 0, someday
-        */
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = ipath_create_user_egr(pd);
-       if (ret)
-               goto done;
-
-       /*
-        * set the eager head register for this port to the current values
-        * of the tail pointers, since we don't know if they were
-        * updated on last use of the port.
-        */
-       head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-       pd->port_lastrcvhdrqtail = -1;
-       ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-               pd->port_port, head32);
-       pd->port_tidcursor = 0; /* start at beginning after open */
-
-       /* initialize poll variables... */
-       pd->port_urgent = 0;
-       pd->port_urgent_poll = 0;
-       pd->port_hdrqfull_poll = pd->port_hdrqfull;
-
-       /*
-        * Now enable the port for receive.
-        * For chips that are set to DMA the tail register to memory
-        * when they change (and when the update bit transitions from
-        * 0 to 1.  So for those chips, we turn it off and then back on.
-        * This will (very briefly) affect any other open ports, but the
-        * duration is very short, and therefore isn't an issue.  We
-        * explicitly set the in-memory tail copy to 0 beforehand, so we
-        * don't have to wait to be sure the DMA update has happened
-        * (chip resets head/tail to 0 on transition to enable).
-        */
-       set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-               &dd->ipath_rcvctrl);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl &
-                       ~(1ULL << dd->ipath_r_tailupd_shift));
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* Notify any waiting slaves */
-       if (pd->port_subport_cnt) {
-               clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-               wake_up(&pd->port_wait);
-       }
-done:
-       return ret;
-}
-
-/**
- * unlock_exptid - unlock any expected TID entries port still had in use
- * @pd: port
- *
- * We don't actually update the chip here, because we do a bulk update
- * below, using ipath_f_clear_tids.
- */
-static void unlock_expected_tids(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
-       int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-
-       ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
-                  pd->port_port);
-       for (i = port_tidbase; i < maxtid; i++) {
-               struct page *ps = dd->ipath_pageshadow[i];
-
-               if (!ps)
-                       continue;
-
-               dd->ipath_pageshadow[i] = NULL;
-               pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
-                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-               ipath_release_user_pages_on_close(&ps, 1);
-               cnt++;
-               ipath_stats.sps_pageunlocks++;
-       }
-       if (cnt)
-               ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
-                          pd->port_port, cnt);
-
-       if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
-               ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
-                          (unsigned long long) ipath_stats.sps_pagelocks,
-                          (unsigned long long)
-                          ipath_stats.sps_pageunlocks);
-}
-
-static int ipath_close(struct inode *in, struct file *fp)
-{
-       struct ipath_filedata *fd;
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned port;
-       struct pid *pid;
-
-       ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
-                  (long)in->i_rdev, fp->private_data);
-
-       mutex_lock(&ipath_mutex);
-
-       fd = fp->private_data;
-       fp->private_data = NULL;
-       pd = fd->pd;
-       if (!pd) {
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-
-       dd = pd->port_dd;
-
-       /* drain user sdma queue */
-       ipath_user_sdma_queue_drain(dd, fd->pq);
-       ipath_user_sdma_queue_destroy(fd->pq);
-
-       if (--pd->port_cnt) {
-               /*
-                * XXX If the master closes the port before the slave(s),
-                * revoke the mmap for the eager receive queue so
-                * the slave(s) don't wait for receive data forever.
-                */
-               pd->active_slaves &= ~(1 << fd->subport);
-               put_pid(pd->port_subpid[fd->subport]);
-               pd->port_subpid[fd->subport] = NULL;
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-       /* early; no interrupt users after this */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       port = pd->port_port;
-       dd->ipath_pd[port] = NULL;
-       pid = pd->port_pid;
-       pd->port_pid = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (pd->port_rcvwait_to || pd->port_piowait_to
-           || pd->port_rcvnowait || pd->port_pionowait) {
-               ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
-                          "%u rcv %u, pio already\n",
-                          pd->port_port, pd->port_rcvwait_to,
-                          pd->port_piowait_to, pd->port_rcvnowait,
-                          pd->port_pionowait);
-               pd->port_rcvwait_to = pd->port_piowait_to =
-                       pd->port_rcvnowait = pd->port_pionowait = 0;
-       }
-       if (pd->port_flag) {
-               ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
-                         pd->port_port, pd->port_flag);
-               pd->port_flag = 0;
-       }
-
-       if (dd->ipath_kregbase) {
-               /* atomically clear receive enable port and intr avail. */
-               clear_bit(dd->ipath_r_portenable_shift + port,
-                         &dd->ipath_rcvctrl);
-               clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                         &dd->ipath_rcvctrl);
-               ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl);
-               /* and read back from chip to be sure that nothing
-                * else is in flight when we do the rest */
-               (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-               /* clean up the pkeys for this port user */
-               ipath_clean_part_key(pd, dd);
-               /*
-                * be paranoid, and never write 0's to these, just use an
-                * unused part of the port 0 tail page.  Of course,
-                * rcvhdraddr points to a large chunk of memory, so this
-                * could still trash things, but at least it won't trash
-                * page 0, and by disabling the port, it should stop "soon",
-                * even if a packet or two is in already in flight after we
-                * disabled the port.
-                */
-               ipath_write_kreg_port(dd,
-                       dd->ipath_kregs->kr_rcvhdrtailaddr, port,
-                       dd->ipath_dummy_hdrq_phys);
-               ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                       pd->port_port, dd->ipath_dummy_hdrq_phys);
-
-               ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
-               ipath_chg_pioavailkernel(dd, pd->port_pio_base,
-                       pd->port_piocnt, 1);
-
-               dd->ipath_f_clear_tids(dd, pd->port_port);
-
-               if (dd->ipath_pageshadow)
-                       unlock_expected_tids(pd);
-               ipath_stats.sps_ports--;
-               ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-                          pd->port_comm, pid_nr(pid),
-                          dd->ipath_unit, port);
-       }
-
-       put_pid(pid);
-       mutex_unlock(&ipath_mutex);
-       ipath_free_pddata(dd, pd); /* after releasing the mutex */
-
-bail:
-       kfree(fd);
-       return 0;
-}
-
-static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
-                          struct ipath_port_info __user *uinfo)
-{
-       struct ipath_port_info info;
-       int nup;
-       int ret;
-       size_t sz;
-
-       (void) ipath_count_units(NULL, &nup, NULL);
-       info.num_active = nup;
-       info.unit = pd->port_dd->ipath_unit;
-       info.port = pd->port_port;
-       info.subport = subport;
-       /* Don't return new fields if old library opened the port. */
-       if (ipath_supports_subports(pd->userversion >> 16,
-                                   pd->userversion & 0xffff)) {
-               /* Number of user ports available for this device. */
-               info.num_ports = pd->port_dd->ipath_cfgports - 1;
-               info.num_subports = pd->port_subport_cnt;
-               sz = sizeof(info);
-       } else
-               sz = sizeof(info) - 2 * sizeof(u16);
-
-       if (copy_to_user(uinfo, &info, sz)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_get_slave_info(struct ipath_portdata *pd,
-                               void __user *slave_mask_addr)
-{
-       int ret = 0;
-
-       if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
-               ret = -EFAULT;
-       return ret;
-}
-
-static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
-                                  u32 __user *inflightp)
-{
-       const u32 val = ipath_user_sdma_inflight_counter(pq);
-
-       if (put_user(val, inflightp))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int ipath_sdma_get_complete(struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  u32 __user *completep)
-{
-       u32 val;
-       int err;
-
-       err = ipath_user_sdma_make_progress(dd, pq);
-       if (err < 0)
-               return err;
-
-       val = ipath_user_sdma_complete_counter(pq);
-       if (put_user(val, completep))
-               return -EFAULT;
-
-       return 0;
-}
-
-static ssize_t ipath_write(struct file *fp, const char __user *data,
-                          size_t count, loff_t *off)
-{
-       const struct ipath_cmd __user *ucmd;
-       struct ipath_portdata *pd;
-       const void __user *src;
-       size_t consumed, copy;
-       struct ipath_cmd cmd;
-       ssize_t ret = 0;
-       void *dest;
-
-       if (count < sizeof(cmd.type)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ucmd = (const struct ipath_cmd __user *) data;
-
-       if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       consumed = sizeof(cmd.type);
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-       case __IPATH_CMD_USER_INIT:
-       case IPATH_CMD_USER_INIT:
-               copy = sizeof(cmd.cmd.user_info);
-               dest = &cmd.cmd.user_info;
-               src = &ucmd->cmd.user_info;
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               copy = sizeof(cmd.cmd.recv_ctrl);
-               dest = &cmd.cmd.recv_ctrl;
-               src = &ucmd->cmd.recv_ctrl;
-               break;
-       case IPATH_CMD_PORT_INFO:
-               copy = sizeof(cmd.cmd.port_info);
-               dest = &cmd.cmd.port_info;
-               src = &ucmd->cmd.port_info;
-               break;
-       case IPATH_CMD_TID_UPDATE:
-       case IPATH_CMD_TID_FREE:
-               copy = sizeof(cmd.cmd.tid_info);
-               dest = &cmd.cmd.tid_info;
-               src = &ucmd->cmd.tid_info;
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               copy = sizeof(cmd.cmd.part_key);
-               dest = &cmd.cmd.part_key;
-               src = &ucmd->cmd.part_key;
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               copy = sizeof(cmd.cmd.slave_mask_addr);
-               dest = &cmd.cmd.slave_mask_addr;
-               src = &ucmd->cmd.slave_mask_addr;
-               break;
-       case IPATH_CMD_PIOAVAILUPD:     // force an update of PIOAvail reg
-               copy = 0;
-               src = NULL;
-               dest = NULL;
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               copy = sizeof(cmd.cmd.poll_type);
-               dest = &cmd.cmd.poll_type;
-               src = &ucmd->cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               copy = sizeof(cmd.cmd.armlaunch_ctrl);
-               dest = &cmd.cmd.armlaunch_ctrl;
-               src = &ucmd->cmd.armlaunch_ctrl;
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               copy = sizeof(cmd.cmd.sdma_inflight);
-               dest = &cmd.cmd.sdma_inflight;
-               src = &ucmd->cmd.sdma_inflight;
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               copy = sizeof(cmd.cmd.sdma_complete);
-               dest = &cmd.cmd.sdma_complete;
-               src = &ucmd->cmd.sdma_complete;
-               break;
-       default:
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (copy) {
-               if ((count - consumed) < copy) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               if (copy_from_user(dest, src, copy)) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               consumed += copy;
-       }
-
-       pd = port_fp(fp);
-       if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
-               cmd.type != IPATH_CMD_ASSIGN_PORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               break;
-       case __IPATH_CMD_USER_INIT:
-               /* backwards compatibility, get port first */
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               /* and fall through to current version. */
-       case IPATH_CMD_USER_INIT:
-               ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               ret = ipath_get_base_info(
-                       fp, (void __user *) (unsigned long)
-                       cmd.cmd.user_info.spu_base_info,
-                       cmd.cmd.user_info.spu_base_info_size);
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
-               break;
-       case IPATH_CMD_PORT_INFO:
-               ret = ipath_port_info(pd, subport_fp(fp),
-                                     (struct ipath_port_info __user *)
-                                     (unsigned long) cmd.cmd.port_info);
-               break;
-       case IPATH_CMD_TID_UPDATE:
-               ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_TID_FREE:
-               ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               ret = ipath_set_part_key(pd, cmd.cmd.part_key);
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               ret = ipath_get_slave_info(pd,
-                                          (void __user *) (unsigned long)
-                                          cmd.cmd.slave_mask_addr);
-               break;
-       case IPATH_CMD_PIOAVAILUPD:
-               ipath_force_pio_avail_update(pd->port_dd);
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               pd->poll_type = cmd.cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               if (cmd.cmd.armlaunch_ctrl)
-                       ipath_enable_armlaunch(pd->port_dd);
-               else
-                       ipath_disable_armlaunch(pd->port_dd);
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_inflight);
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               ret = ipath_sdma_get_complete(pd->port_dd,
-                                             user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_complete);
-               break;
-       }
-
-       if (ret >= 0)
-               ret = consumed;
-
-bail:
-       return ret;
-}
-
-static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct file *filp = iocb->ki_filp;
-       struct ipath_filedata *fp = filp->private_data;
-       struct ipath_portdata *pd = port_fp(filp);
-       struct ipath_user_sdma_queue *pq = fp->pq;
-
-       if (!iter_is_iovec(from) || !from->nr_segs)
-               return -EINVAL;
-
-       return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
-}
-
-static struct class *ipath_class;
-
-static int init_cdev(int minor, char *name, const struct file_operations *fops,
-                    struct cdev **cdevp, struct device **devp)
-{
-       const dev_t dev = MKDEV(IPATH_MAJOR, minor);
-       struct cdev *cdev = NULL;
-       struct device *device = NULL;
-       int ret;
-
-       cdev = cdev_alloc();
-       if (!cdev) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate cdev for minor %d, %s\n",
-                      minor, name);
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cdev->owner = THIS_MODULE;
-       cdev->ops = fops;
-       kobject_set_name(&cdev->kobj, name);
-
-       ret = cdev_add(cdev, dev, 1);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not add cdev for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       device = device_create(ipath_class, NULL, dev, NULL, name);
-
-       if (IS_ERR(device)) {
-               ret = PTR_ERR(device);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       goto done;
-
-err_cdev:
-       cdev_del(cdev);
-       cdev = NULL;
-
-done:
-       if (ret >= 0) {
-               *cdevp = cdev;
-               *devp = device;
-       } else {
-               *cdevp = NULL;
-               *devp = NULL;
-       }
-
-       return ret;
-}
-
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp)
-{
-       return init_cdev(minor, name, fops, cdevp, devp);
-}
-
-static void cleanup_cdev(struct cdev **cdevp,
-                        struct device **devp)
-{
-       struct device *dev = *devp;
-
-       if (dev) {
-               device_unregister(dev);
-               *devp = NULL;
-       }
-
-       if (*cdevp) {
-               cdev_del(*cdevp);
-               *cdevp = NULL;
-       }
-}
-
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp)
-{
-       cleanup_cdev(cdevp, devp);
-}
-
-static struct cdev *wildcard_cdev;
-static struct device *wildcard_dev;
-
-static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
-
-static int user_init(void)
-{
-       int ret;
-
-       ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
-                      "chrdev region (err %d)\n", -ret);
-               goto done;
-       }
-
-       ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
-
-       if (IS_ERR(ipath_class)) {
-               ret = PTR_ERR(ipath_class);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device class (err %d)\n", -ret);
-               goto bail;
-       }
-
-       goto done;
-bail:
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-done:
-       return ret;
-}
-
-static void user_cleanup(void)
-{
-       if (ipath_class) {
-               class_destroy(ipath_class);
-               ipath_class = NULL;
-       }
-
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-}
-
-static atomic_t user_count = ATOMIC_INIT(0);
-static atomic_t user_setup = ATOMIC_INIT(0);
-
-int ipath_user_add(struct ipath_devdata *dd)
-{
-       char name[10];
-       int ret;
-
-       if (atomic_inc_return(&user_count) == 1) {
-               ret = user_init();
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Unable to set up user support: "
-                                     "error %d\n", -ret);
-                       goto bail;
-               }
-               ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
-                               &wildcard_dev);
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Could not create wildcard "
-                                     "minor: error %d\n", -ret);
-                       goto bail_user;
-               }
-
-               atomic_set(&user_setup, 1);
-       }
-
-       snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
-
-       ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
-                       &dd->user_cdev, &dd->user_dev);
-       if (ret < 0)
-               ipath_dev_err(dd, "Could not create user minor %d, %s\n",
-                             dd->ipath_unit + 1, name);
-
-       goto bail;
-
-bail_user:
-       user_cleanup();
-bail:
-       return ret;
-}
-
-void ipath_user_remove(struct ipath_devdata *dd)
-{
-       cleanup_cdev(&dd->user_cdev, &dd->user_dev);
-
-       if (atomic_dec_return(&user_count) == 0) {
-               if (atomic_read(&user_setup) == 0)
-                       goto bail;
-
-               cleanup_cdev(&wildcard_cdev, &wildcard_dev);
-               user_cleanup();
-
-               atomic_set(&user_setup, 0);
-       }
-bail:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644 (file)
index 796af68..0000000
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-#define IPATHFS_MAGIC 0x726a77
-
-static struct super_block *ipath_super;
-
-static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
-                        umode_t mode, const struct file_operations *fops,
-                        void *data)
-{
-       int error;
-       struct inode *inode = new_inode(dir->i_sb);
-
-       if (!inode) {
-               error = -EPERM;
-               goto bail;
-       }
-
-       inode->i_ino = get_next_ino();
-       inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       inode->i_private = data;
-       if (S_ISDIR(mode)) {
-               inode->i_op = &simple_dir_inode_operations;
-               inc_nlink(inode);
-               inc_nlink(dir);
-       }
-
-       inode->i_fop = fops;
-
-       d_instantiate(dentry, inode);
-       error = 0;
-
-bail:
-       return error;
-}
-
-static int create_file(const char *name, umode_t mode,
-                      struct dentry *parent, struct dentry **dentry,
-                      const struct file_operations *fops, void *data)
-{
-       int error;
-
-       mutex_lock(&d_inode(parent)->i_mutex);
-       *dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(*dentry))
-               error = ipathfs_mknod(d_inode(parent), *dentry,
-                                     mode, fops, data);
-       else
-               error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
-
-       return error;
-}
-
-static ssize_t atomic_stats_read(struct file *file, char __user *buf,
-                                size_t count, loff_t *ppos)
-{
-       return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
-                                      sizeof ipath_stats);
-}
-
-static const struct file_operations atomic_stats_ops = {
-       .read = atomic_stats_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t atomic_counters_read(struct file *file, char __user *buf,
-                                   size_t count, loff_t *ppos)
-{
-       struct infinipath_counters counters;
-       struct ipath_devdata *dd;
-
-       dd = file_inode(file)->i_private;
-       dd->ipath_f_read_counters(dd, &counters);
-
-       return simple_read_from_buffer(buf, count, ppos, &counters,
-                                      sizeof counters);
-}
-
-static const struct file_operations atomic_counters_ops = {
-       .read = atomic_counters_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t flash_read(struct file *file, char __user *buf,
-                         size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if ( pos < 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (pos >= sizeof(struct ipath_flash)) {
-               ret = 0;
-               goto bail;
-       }
-
-       if (count > sizeof(struct ipath_flash) - pos)
-               count = sizeof(struct ipath_flash) - pos;
-
-       tmp = kmalloc(count, GFP_KERNEL);
-       if (!tmp) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_read(dd, pos, tmp, count)) {
-               ipath_dev_err(dd, "failed to read from flash\n");
-               ret = -ENXIO;
-               goto bail_tmp;
-       }
-
-       if (copy_to_user(buf, tmp, count)) {
-               ret = -EFAULT;
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static ssize_t flash_write(struct file *file, const char __user *buf,
-                          size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if (pos != 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (count != sizeof(struct ipath_flash)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       tmp = memdup_user(buf, count);
-       if (IS_ERR(tmp))
-               return PTR_ERR(tmp);
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_write(dd, pos, tmp, count)) {
-               ret = -ENXIO;
-               ipath_dev_err(dd, "failed to write to flash\n");
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static const struct file_operations flash_ops = {
-       .read = flash_read,
-       .write = flash_write,
-       .llseek = default_llseek,
-};
-
-static int create_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *tmp;
-       char unit[10];
-       int ret;
-
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
-                         &simple_dir_operations, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
-                         &atomic_counters_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/atomic_counters) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
-                         &flash_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/flash) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-static int remove_file(struct dentry *parent, char *name)
-{
-       struct dentry *tmp;
-       int ret;
-
-       tmp = lookup_one_len(name, parent, strlen(name));
-
-       if (IS_ERR(tmp)) {
-               ret = PTR_ERR(tmp);
-               goto bail;
-       }
-
-       spin_lock(&tmp->d_lock);
-       if (simple_positive(tmp)) {
-               dget_dlock(tmp);
-               __d_drop(tmp);
-               spin_unlock(&tmp->d_lock);
-               simple_unlink(d_inode(parent), tmp);
-       } else
-               spin_unlock(&tmp->d_lock);
-
-       ret = 0;
-bail:
-       /*
-        * We don't expect clients to care about the return value, but
-        * it's there if they need it.
-        */
-       return ret;
-}
-
-static int remove_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *root;
-       char unit[10];
-       int ret;
-
-       root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       dir = lookup_one_len(unit, root, strlen(unit));
-
-       if (IS_ERR(dir)) {
-               ret = PTR_ERR(dir);
-               printk(KERN_ERR "Lookup of %s failed\n", unit);
-               goto bail;
-       }
-
-       remove_file(dir, "flash");
-       remove_file(dir, "atomic_counters");
-       d_delete(dir);
-       ret = simple_rmdir(d_inode(root), dir);
-
-bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
-       dput(root);
-       return ret;
-}
-
-static int ipathfs_fill_super(struct super_block *sb, void *data,
-                             int silent)
-{
-       struct ipath_devdata *dd, *tmp;
-       unsigned long flags;
-       int ret;
-
-       static struct tree_descr files[] = {
-               [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
-               {""},
-       };
-
-       ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
-       if (ret) {
-               printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-               ret = create_device_files(sb, dd);
-               if (ret)
-                       goto bail;
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-bail:
-       return ret;
-}
-
-static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data)
-{
-       struct dentry *ret;
-       ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
-       if (!IS_ERR(ret))
-               ipath_super = ret->d_sb;
-       return ret;
-}
-
-static void ipathfs_kill_super(struct super_block *s)
-{
-       kill_litter_super(s);
-       ipath_super = NULL;
-}
-
-int ipathfs_add_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = create_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-int ipathfs_remove_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = remove_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-static struct file_system_type ipathfs_fs_type = {
-       .owner =        THIS_MODULE,
-       .name =         "ipathfs",
-       .mount =        ipathfs_mount,
-       .kill_sb =      ipathfs_kill_super,
-};
-MODULE_ALIAS_FS("ipathfs");
-
-int __init ipath_init_ipathfs(void)
-{
-       return register_filesystem(&ipathfs_fs_type);
-}
-
-void __exit ipath_exit_ipathfs(void)
-{
-       unregister_filesystem(&ipathfs_fs_type);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644 (file)
index 5f13572..0000000
+++ /dev/null
@@ -1,1939 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT chip.
- */
-
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/htirq.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
-
-
-/*
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-       unsigned long long Revision;
-       unsigned long long Control;
-       unsigned long long PageAlign;
-       unsigned long long PortCnt;
-       unsigned long long DebugPortSelect;
-       unsigned long long DebugPort;
-       unsigned long long SendRegBase;
-       unsigned long long UserRegBase;
-       unsigned long long CounterRegBase;
-       unsigned long long Scratch;
-       unsigned long long ReservedMisc1;
-       unsigned long long InterruptConfig;
-       unsigned long long IntBlocked;
-       unsigned long long IntMask;
-       unsigned long long IntStatus;
-       unsigned long long IntClear;
-       unsigned long long ErrorMask;
-       unsigned long long ErrorStatus;
-       unsigned long long ErrorClear;
-       unsigned long long HwErrMask;
-       unsigned long long HwErrStatus;
-       unsigned long long HwErrClear;
-       unsigned long long HwDiagCtrl;
-       unsigned long long MDIO;
-       unsigned long long IBCStatus;
-       unsigned long long IBCCtrl;
-       unsigned long long ExtStatus;
-       unsigned long long ExtCtrl;
-       unsigned long long GPIOOut;
-       unsigned long long GPIOMask;
-       unsigned long long GPIOStatus;
-       unsigned long long GPIOClear;
-       unsigned long long RcvCtrl;
-       unsigned long long RcvBTHQP;
-       unsigned long long RcvHdrSize;
-       unsigned long long RcvHdrCnt;
-       unsigned long long RcvHdrEntSize;
-       unsigned long long RcvTIDBase;
-       unsigned long long RcvTIDCnt;
-       unsigned long long RcvEgrBase;
-       unsigned long long RcvEgrCnt;
-       unsigned long long RcvBufBase;
-       unsigned long long RcvBufSize;
-       unsigned long long RxIntMemBase;
-       unsigned long long RxIntMemSize;
-       unsigned long long RcvPartitionKey;
-       unsigned long long ReservedRcv[10];
-       unsigned long long SendCtrl;
-       unsigned long long SendPIOBufBase;
-       unsigned long long SendPIOSize;
-       unsigned long long SendPIOBufCnt;
-       unsigned long long SendPIOAvailAddr;
-       unsigned long long TxIntMemBase;
-       unsigned long long TxIntMemSize;
-       unsigned long long ReservedSend[9];
-       unsigned long long SendBufferError;
-       unsigned long long SendBufferErrorCONT1;
-       unsigned long long SendBufferErrorCONT2;
-       unsigned long long SendBufferErrorCONT3;
-       unsigned long long ReservedSBE[4];
-       unsigned long long RcvHdrAddr0;
-       unsigned long long RcvHdrAddr1;
-       unsigned long long RcvHdrAddr2;
-       unsigned long long RcvHdrAddr3;
-       unsigned long long RcvHdrAddr4;
-       unsigned long long RcvHdrAddr5;
-       unsigned long long RcvHdrAddr6;
-       unsigned long long RcvHdrAddr7;
-       unsigned long long RcvHdrAddr8;
-       unsigned long long ReservedRHA[7];
-       unsigned long long RcvHdrTailAddr0;
-       unsigned long long RcvHdrTailAddr1;
-       unsigned long long RcvHdrTailAddr2;
-       unsigned long long RcvHdrTailAddr3;
-       unsigned long long RcvHdrTailAddr4;
-       unsigned long long RcvHdrTailAddr5;
-       unsigned long long RcvHdrTailAddr6;
-       unsigned long long RcvHdrTailAddr7;
-       unsigned long long RcvHdrTailAddr8;
-       unsigned long long ReservedRHTA[7];
-       unsigned long long Sync;        /* Software only */
-       unsigned long long Dump;        /* Software only */
-       unsigned long long SimVer;      /* Software only */
-       unsigned long long ReservedSW[5];
-       unsigned long long SerdesConfig0;
-       unsigned long long SerdesConfig1;
-       unsigned long long SerdesStatus;
-       unsigned long long XGXSConfig;
-       unsigned long long ReservedSW2[4];
-};
-
-struct _infinipath_do_not_use_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 Reserved1;
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 Reserved6;
-       __u64 Reserved7;
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-       .kr_control = IPATH_KREG_OFFSET(Control),
-       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-       .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
-       .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
-       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
-       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-       .kr_revision = IPATH_KREG_OFFSET(Revision),
-       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
-       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-       /*
-        * These should not be used directly via ipath_write_kreg64(),
-        * use them with ipath_write_kreg64_port(),
-        */
-       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-       /* calc from Reg_CounterRegBase + offset */
-       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6110_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL        /* 40 bits valid */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-       (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-       (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-                         char *msg, size_t msgl)
-{
-       char bitsmsg[64];
-       ipath_err_t crcbits = hwerrs &
-               (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-       /*
-        * we'll want to ignore link errors on link that is
-        * not in use, if any.  For now, complain about both
-        */
-       if (crcbits) {
-               u16 ctrl0, ctrl1;
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
-                        !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-                        "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-                                   ? "1 (B)" : "0+1 (A+B)"),
-                        !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-                        : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-                           "0+1"), (unsigned long long) crcbits);
-               strlcat(msg, bitsmsg, msgl);
-
-               /*
-                * print extra info for debugging.  slave/primary
-                * config word 4, 8 (link control 0, 1)
-                */
-
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x4,
-                                        &ctrl0))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl0 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl0 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-                                 ((ctrl0 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl0 >> 4) & 1) ? "linkfail" :
-                                 "");
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x8,
-                                        &ctrl1))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl1 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl1 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-                                 ((ctrl1 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl1 >> 4) & 1) ? "linkfail" :
-                                 "");
-
-               /* disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~crcbits;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-               ipath_dbg("HT crc errs: %s\n", msg);
-       } else
-               ipath_dbg("ignoring HT crc errors 0x%llx, "
-                         "not in use\n", (unsigned long long)
-                         (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-                                    _IPATH_HTLINK1_CRCBITS)));
-}
-
-/* 6110 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
-       INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
-       INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
-       INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
-       INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
-       INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
-                       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
-                       << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
-                         << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-       ++ipath_stats.sps_txeparity;
-       dev_info(&dd->pcidev->dev,
-               "Recovering from TXE PIO parity error\n");
-}
-
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-                                    size_t msgl)
-{
-       ipath_err_t hwerrs;
-       u32 bits, ctrl;
-       int isfatal = 0;
-       char bitsmsg[64];
-       int log_idx;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-       if (!hwerrs) {
-               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-               /*
-                * better than printing cofusing messages
-                * This seems to be related to clearing the crc error, or
-                * the pll error during init.
-                */
-               goto bail;
-       } else if (hwerrs == -1LL) {
-               ipath_dev_err(dd, "Read of hardware error status failed "
-                             "(all bits set); ignoring\n");
-               goto bail;
-       }
-       ipath_stats.sps_hwerrs++;
-
-       /* Always clear the error status register, except MEMBISTFAIL,
-        * regardless of whether we continue or stop using the chip.
-        * We want that set so we know it failed, even across driver reload.
-        * We'll still ignore it in the hwerrmask.  We do this partly for
-        * diagnostics, but also for support */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-       hwerrs &= dd->ipath_hwerrmask;
-
-       /* We log some errors to EEPROM, check if we have any of those. */
-       for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
-               if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
-                       ipath_inc_eeprom_err(dd, log_idx, 1);
-
-       /*
-        * make sure we get this much out, unless told to be quiet,
-        * it's a parity error we may recover from,
-        * or it's occurred within the last 5 seconds
-        */
-       if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
-               RXE_EAGER_PARITY)) ||
-               (ipath_debug & __IPATH_VERBDBG))
-               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-                        "(cleared)\n", (unsigned long long) hwerrs);
-       dd->ipath_lasthwerror |= hwerrs;
-
-       if (hwerrs & ~dd->ipath_hwe_bitsextant)
-               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (hwerrs & ~dd->ipath_hwe_bitsextant));
-
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-       if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
-               /*
-                * parity errors in send memory are recoverable,
-                * just cancel the send (if indicated in * sendbuffererror),
-                * count the occurrence, unfreeze (if no other handled
-                * hardware error bits are set), and continue. They can
-                * occur if a processor speculative read is done to the PIO
-                * buffer while we are sending a packet, for example.
-                */
-               if (hwerrs & TXE_PIO_PARITY) {
-                       ipath_ht_txe_recover(dd);
-                       hwerrs &= ~TXE_PIO_PARITY;
-               }
-
-               if (!hwerrs) {
-                       ipath_dbg("Clearing freezemode on ignored or "
-                                 "recovered hardware error\n");
-                       ipath_clear_freeze(dd);
-               }
-       }
-
-       *msg = '\0';
-
-       /*
-        * may someday want to decode into which bits are which
-        * functional area for parity errors, etc.
-        */
-       if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-                     << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-
-       ipath_format_hwerrors(hwerrs,
-                             ipath_6110_hwerror_msgs,
-                             ARRAY_SIZE(ipath_6110_hwerror_msgs),
-                             msg, msgl);
-
-       if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-               hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-               strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
-                       msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
-                        INFINIPATH_HWE_COREPLL_RFSLIP |        \
-                        INFINIPATH_HWE_HTBPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTBPLL_RFSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-       if (hwerrs & _IPATH_PLL_FAIL) {
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PLL failed (%llx), InfiniPath hardware unusable]",
-                        (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-               strlcat(msg, bitsmsg, msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-               /*
-                * If it occurs, it is left masked since the eternal
-                * interface is unused
-                */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs) {
-               /*
-                * if any set that we aren't ignoring; only
-                * make the complaint once, in case it's stuck
-                * or recurring, and we get here multiple
-                * times.
-                * force link down, so switch knows, and
-                * LEDs are turned off
-                */
-               if (dd->ipath_flags & IPATH_INITTED) {
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       ipath_setup_ht_setextled(dd,
-                               INFINIPATH_IBCS_L_STATE_DOWN,
-                               INFINIPATH_IBCS_LT_STATE_DISABLED);
-                       ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-                                         "mode), no longer usable, SN %.16s\n",
-                                         dd->ipath_serial);
-                       isfatal = 1;
-               }
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               /*
-                * mark as not usable, at a minimum until driver
-                * is reloaded, probably until reboot, since no
-                * other reset is possible.
-                */
-               dd->ipath_flags &= ~IPATH_INITTED;
-       } else {
-               *msg = 0; /* recovered from all of them */
-       }
-       if (*msg)
-               ipath_dev_err(dd, "%s hardware error\n", msg);
-       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-               /*
-                * for status file; if no trailing brace is copied,
-                * we'll know it was truncated.
-                */
-               snprintf(dd->ipath_freezemsg,
-                        dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       char *n = NULL;
-       u8 boardrev = dd->ipath_boardrev;
-       int ret = 0;
-
-       switch (boardrev) {
-       case 5:
-               /*
-                * original production board; two production levels, with
-                * different serial number ranges.   See ipath_ht_early_init() for
-                * case where we enable IPATH_GPIO_INTR for later serial # range.
-                * Original 112* serial number is no longer supported.
-                */
-               n = "InfiniPath_QHT7040";
-               break;
-       case 7:
-               /* small form factor production board */
-               n = "InfiniPath_QHT7140";
-               break;
-       default:                /* don't know, just print the number */
-               ipath_dev_err(dd, "Don't yet know about board "
-                             "with ID %u\n", boardrev);
-               snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
-                        boardrev);
-               break;
-       }
-       if (n)
-               snprintf(name, namelen, "%s", n);
-
-       if (ret) {
-               ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-               goto bail;
-       }
-       if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-               dd->ipath_minrev > 4)) {
-               /*
-                * This version of the driver only supports Rev 3.2 - 3.4
-                */
-               ipath_dev_err(dd,
-                             "Unsupported InfiniPath hardware revision %u.%u!\n",
-                             dd->ipath_majrev, dd->ipath_minrev);
-               ret = 1;
-               goto bail;
-       }
-       /*
-        * pkt/word counters are 32 bit, and therefore wrap fast enough
-        * that we snapshot them from a timer, and maintain 64 bit shadow
-        * copies
-        */
-       dd->ipath_flags |= IPATH_32BITCOUNTERS;
-       dd->ipath_flags |= IPATH_GPIO_INTR;
-       if (dd->ipath_lbus_speed != 800)
-               ipath_dev_err(dd,
-                             "Incorrectly configured for HT @ %uMHz\n",
-                             dd->ipath_lbus_speed);
-
-       /*
-        * set here, not in ipath_init_*_funcs because we have to do
-        * it after we can read chip registers.
-        */
-       dd->ipath_ureg_align =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
-bail:
-       return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-       u8 linkerr, link_off, i;
-
-       for (i = 0; i < 2; i++) {
-               link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-               if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkerror%d of HT slave/primary block\n",
-                                i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set should
-                        * clear them
-                        */
-                       if (pci_write_config_byte(dd->pcidev, link_off,
-                                                 linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(dd->pcidev, link_off,
-                                                &linkerr))
-                               dev_info(&dd->pcidev->dev,
-                                        "Couldn't reread linkerror%d of "
-                                        "HT slave/primary block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&dd->pcidev->dev,
-                                        "HT linkerror%d bits 0x%x "
-                                        "couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-       ipath_dbg("No reset possible for this InfiniPath hardware\n");
-       return 0;
-}
-
-#define HT_INTR_DISC_CONFIG  0x80      /* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2 /* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-                            int pos, u8 cap_type)
-{
-       u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-       u16 linkctrl = 0;
-       int i;
-
-       dd->ipath_ht_slave_off = pos;
-       /* command word, master_host bit */
-       /* master host || slave */
-       if ((cap_type >> 2) & 1)
-               link_a_b_off = 4;
-       else
-               link_a_b_off = 0;
-       ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-                  link_a_b_off ? 1 : 0,
-                  link_a_b_off ? 'B' : 'A');
-
-       link_a_b_off += pos;
-
-       /*
-        * check both link control registers; clear both HT CRC sets if
-        * necessary.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0x4;
-               if (pci_read_config_word(pdev, link_off, &linkctrl))
-                       ipath_dev_err(dd, "Couldn't read HT link control%d "
-                                     "register\n", i);
-               else if (linkctrl & (0xf << 8)) {
-                       ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-                                  "bits %x\n", i, linkctrl & (0xf << 8));
-                       /*
-                        * now write them back to clear the error.
-                        */
-                       pci_write_config_word(pdev, link_off,
-                                             linkctrl & (0xf << 8));
-               }
-       }
-
-       /*
-        * As with HT CRC bits, same for protocol errors that might occur
-        * during boot.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0xd;
-               if (pci_read_config_byte(pdev, link_off, &linkerr))
-                       dev_info(&pdev->dev, "Couldn't read linkerror%d "
-                                "of HT slave/primary block\n", i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set will clear
-                        * them
-                        */
-                       if (pci_write_config_byte
-                           (pdev, link_off, linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(pdev, link_off, &linkerr))
-                               dev_info(&pdev->dev, "Couldn't reread "
-                                        "linkerror%d of HT slave/primary "
-                                        "block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&pdev->dev, "HT linkerror%d bits "
-                                        "0x%x couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-
-       if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link width "
-                             "config register\n");
-       else {
-               u32 width;
-               switch (linkwidth & 7) {
-               case 5:
-                       width = 4;
-                       break;
-               case 4:
-                       width = 2;
-                       break;
-               case 3:
-                       width = 32;
-                       break;
-               case 1:
-                       width = 16;
-                       break;
-               case 0:
-               default:        /* if wrong, assume 8 bit */
-                       width = 8;
-                       break;
-               }
-
-               dd->ipath_lbus_width = width;
-
-               if (linkwidth != 0x11) {
-                       ipath_dev_err(dd, "Not configured for 16 bit HT "
-                                     "(%x)\n", linkwidth);
-                       if (!(linkwidth & 0xf)) {
-                               ipath_dbg("Will ignore HT lane1 errors\n");
-                               dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-                       }
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-       if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link frequency "
-                             "config register\n");
-       else {
-               u32 speed;
-               switch (linkwidth & 0xf) {
-               case 6:
-                       speed = 1000;
-                       break;
-               case 5:
-                       speed = 800;
-                       break;
-               case 4:
-                       speed = 600;
-                       break;
-               case 3:
-                       speed = 500;
-                       break;
-               case 2:
-                       speed = 400;
-                       break;
-               case 1:
-                       speed = 300;
-                       break;
-               default:
-                       /*
-                        * assume reserved and vendor-specific are 200...
-                        */
-               case 0:
-                       speed = 200;
-                       break;
-               }
-               dd->ipath_lbus_speed = speed;
-       }
-
-       snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
-               "HyperTransport,%uMHz,x%u\n",
-               dd->ipath_lbus_speed,
-               dd->ipath_lbus_width);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (dd->ipath_intconfig) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-                                dd->ipath_intconfig);  /* interrupt address */
-               ret = 0;
-       } else {
-               ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-                             "interrupt address\n");
-               ret = -EINVAL;
-       }
-
-       return ret;
-}
-
-static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
-                               struct ht_irq_msg *msg)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(dev);
-       u64 prev_intconfig = dd->ipath_intconfig;
-
-       dd->ipath_intconfig = msg->address_lo;
-       dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
-
-       /*
-        * If the previous value of dd->ipath_intconfig is zero, we're
-        * getting configured for the first time, and must not program the
-        * intconfig register here (it will be programmed later, when the
-        * hardware is ready).  Otherwise, we should.
-        */
-       if (prev_intconfig)
-               ipath_ht_intconfig(dd);
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-                                struct pci_dev *pdev)
-{
-       int pos, ret;
-
-       ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
-       if (ret < 0) {
-               ipath_dev_err(dd, "Couldn't create interrupt handler: "
-                             "err %d\n", ret);
-               goto bail;
-       }
-       dd->ipath_irq = ret;
-       ret = 0;
-
-       /*
-        * Handle clearing CRC errors in linkctrl register if necessary.  We
-        * do this early, before we ever enable errors or hardware errors,
-        * mostly to avoid causing the chip to enter freeze mode.
-        */
-       pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
-       if (!pos) {
-               ipath_dev_err(dd, "Couldn't find HyperTransport "
-                             "capability; no interrupts\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-       do {
-               u8 cap_type;
-
-               /*
-                * The HT capability type byte is 3 bytes after the
-                * capability byte.
-                */
-               if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-                       dev_info(&pdev->dev, "Couldn't read config "
-                                "command @ %d\n", pos);
-                       continue;
-               }
-               if (!(cap_type & 0xE0))
-                       slave_or_pri_blk(dd, pdev, pos, cap_type);
-       } while ((pos = pci_find_next_capability(pdev, pos,
-                                                PCI_CAP_ID_HT)));
-
-       dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT chip, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-                                    u64 lst, u64 ltst)
-{
-       u64 extctl;
-       unsigned long flags = 0;
-
-       /* the diags use the LED to indicate diag info, so we leave
-        * the external LED alone when the diags are running */
-       if (ipath_diag_inuse)
-               return;
-
-       /* Allow override of LED display for, e.g. Locating system in rack */
-       if (dd->ipath_led_override) {
-               ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
-                       ? INFINIPATH_IBCS_LT_STATE_LINKUP
-                       : INFINIPATH_IBCS_LT_STATE_DISABLED;
-               lst = (dd->ipath_led_override & IPATH_LED_LOG)
-                       ? INFINIPATH_IBCS_L_STATE_ACTIVE
-                       : INFINIPATH_IBCS_L_STATE_DOWN;
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /*
-        * start by setting both LED control bits to off, then turn
-        * on the appropriate bit(s).
-        */
-       if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-               /*
-                * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-                * is inverted,  because it is normally used to indicate
-                * a hardware fault at reset, if there were errors
-                */
-               extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-                       | INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-       } else {
-               extctl = dd->ipath_extctrl &
-                       ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-                         INFINIPATH_EXTC_LED2PRIPORT_ON);
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-       }
-       dd->ipath_extctrl = extctl;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-static void ipath_init_ht_variables(struct ipath_devdata *dd)
-{
-       /*
-        * setup the register offsets, since they are different for each
-        * chip
-        */
-       dd->ipath_kregs = &ipath_ht_kregs;
-       dd->ipath_cregs = &ipath_ht_cregs;
-
-       dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-       dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-       dd->ipath_gpio_sda = IPATH_GPIO_SDA;
-       dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
-       /*
-        * Fill in data for field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKTRAININGSTATE
-        * and only the shift for LINKSTATE, as they are the only ones
-        * that change.  Also precalculate the 3 link states of interest
-        * and the combined mask.
-        */
-       dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
-       dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
-       dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
-               dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
-       dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
-       dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
-       dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
-       /*
-        * Fill in data for ibcc field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKINITCMD
-        * and only the shift for LINKCMD and MAXPKTLEN, as they are
-        * the only ones that change.
-        */
-       dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
-       dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
-       dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
-       /* Fill in shifts for RcvCtrl. */
-       dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
-       dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
-       dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
-       dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
-
-       dd->ipath_i_bitsextant =
-               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-               (INFINIPATH_I_RCVAVAIL_MASK <<
-                INFINIPATH_I_RCVAVAIL_SHIFT) |
-               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-       dd->ipath_e_bitsextant =
-               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-               INFINIPATH_E_HARDWARE;
-
-       dd->ipath_hwe_bitsextant =
-               (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-               INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-               INFINIPATH_HWE_HTCMISCERR4 |
-               INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-               INFINIPATH_HWE_HTCMISCERR7 |
-               INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-               INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-               INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-               INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-               INFINIPATH_HWE_MEMBISTFAILED |
-               INFINIPATH_HWE_COREPLL_FBSLIP |
-               INFINIPATH_HWE_COREPLL_RFSLIP |
-               INFINIPATH_HWE_HTBPLL_FBSLIP |
-               INFINIPATH_HWE_HTBPLL_RFSLIP |
-               INFINIPATH_HWE_HTAPLL_FBSLIP |
-               INFINIPATH_HWE_HTAPLL_RFSLIP |
-               INFINIPATH_HWE_SERDESPLLFAILED |
-               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-       dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-       dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-       dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
-       dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
-       /*
-        * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
-        * 2 is Some Misc, 3 is reserved for future.
-        */
-       dd->ipath_eep_st_masks[0].hwerrs_to_log =
-               INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[1].hwerrs_to_log =
-               INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
-       dd->delay_mult = 2; /* SDR, 4X, can't change */
-
-       dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
-       dd->ipath_link_speed_supported = IPATH_IB_SDR;
-       dd->ipath_link_width_enabled = IB_WIDTH_4X;
-       dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
-       /* these can't change for this chip, so set once */
-       dd->ipath_link_width_active = dd->ipath_link_width_enabled;
-       dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-       ipath_err_t val;
-       u64 extsval;
-
-       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-               ipath_dev_err(dd, "MemBIST did not complete!\n");
-       if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
-               ipath_dbg("MemBIST corrected\n");
-
-       ipath_check_htlink(dd);
-
-       /* barring bugs, all hwerrors become interrupts, which can */
-       val = -1LL;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               val &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-       /*
-        * disable RXDSYNCMEMPARITY because external serdes is unused,
-        * and therefore the logic will never be used or initialized,
-        * and uninitialized state will normally result in this error
-        * being asserted.  Similarly for the external serdess pll
-        * lock signal.
-        */
-       val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-                INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-       /*
-        * Disable MISCERR4 because of an inversion in the HT core
-        * logic checking for errors that cause this bit to be set.
-        * The errata can also cause the protocol error bit to be set
-        * in the HT config space linkerror register(s).
-        */
-       val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-       /*
-        * PLL ignored because unused MDIO interface has a logic problem
-        */
-       if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-       dd->ipath_hwerrmask = val;
-}
-
-
-
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-       u64 val, config1;
-       int ret = 0, change = 0;
-
-       ipath_dbg("Trying to bringup serdes\n");
-
-       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-           INFINIPATH_HWE_SERDESPLLFAILED)
-       {
-               ipath_dbg("At start, serdes PLL failed bit set in "
-                         "hwerrstatus, clearing and continuing\n");
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                                INFINIPATH_HWE_SERDESPLLFAILED);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-       ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       /* force reset on */
-       val |= INFINIPATH_SERDC0_RESET_PLL
-               /* | INFINIPATH_SERDC0_RESET_MASK */
-               ;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       udelay(15);             /* need pll reset set at least for a bit */
-
-       if (val & INFINIPATH_SERDC0_RESET_PLL) {
-               u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-               /* set lane resets, and tx idle, during pll reset */
-               val2 |= INFINIPATH_SERDC0_RESET_MASK |
-                       INFINIPATH_SERDC0_TXIDLE;
-               ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-                          "%llx)\n", (unsigned long long) val2);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val2);
-               /*
-                * be sure chip saw it
-                */
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               /*
-                * need pll reset clear at least 11 usec before lane
-                * resets cleared; give it a few more
-                */
-               udelay(15);
-               val = val2;     /* for check below */
-       }
-
-       if (val & (INFINIPATH_SERDC0_RESET_PLL |
-                  INFINIPATH_SERDC0_RESET_MASK |
-                  INFINIPATH_SERDC0_TXIDLE)) {
-               val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-                        INFINIPATH_SERDC0_RESET_MASK |
-                        INFINIPATH_SERDC0_TXIDLE);
-               /* clear them */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       if (val & INFINIPATH_XGXS_RESET) {
-               /* normally true after boot */
-               val &= ~INFINIPATH_XGXS_RESET;
-               change = 1;
-       }
-       if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
-            INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
-               /* need to compensate for Tx inversion in partner */
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= dd->ipath_rx_pol_inv <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               change = 1;
-       }
-       if (change)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       /* clear current and de-emphasis bits */
-       config1 &= ~0x0ffffffff00ULL;
-       /* set current to 20ma */
-       config1 |= 0x00000000000ULL;
-       /* set de-emphasis to -5.68dB */
-       config1 |= 0x0cccc000000ULL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-       ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       return ret;             /* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       val |= INFINIPATH_SERDC0_TXIDLE;
-       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-                 (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-                            u64 __iomem *tidptr, u32 type,
-                            unsigned long pa)
-{
-       if (!dd->ipath_kregbase)
-               return;
-
-       if (pa != dd->ipath_tidinvalid) {
-               if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-                       dev_info(&dd->pcidev->dev,
-                                "physaddr %lx has more than "
-                                "40 bits, using only 40!!!\n", pa);
-                       pa &= INFINIPATH_RT_ADDR_MASK;
-               }
-               if (type == RCVHQ_RCV_TYPE_EAGER)
-                       pa |= dd->ipath_tidtemplate;
-               else {
-                       /* in words (fixed, full page).  */
-                       u64 lenvalid = PAGE_SIZE >> 2;
-                       lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-                       pa |= lenvalid | INFINIPATH_RT_VALID;
-               }
-       }
-
-       writeq(pa, tidptr);
-}
-
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-       u64 __iomem *tidbase;
-       int i;
-
-       if (!dd->ipath_kregbase)
-               return;
-
-       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-       /*
-        * need to invalidate all of the expected TID entries for this
-        * port, so we don't have valid entries that might somehow get
-        * used (early in next use of this port, or through some bug)
-        */
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  port * dd->ipath_rcvtidcnt *
-                                  sizeof(*tidbase));
-       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
-                                dd->ipath_tidinvalid);
-
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvegrbase +
-                                  port * dd->ipath_rcvegrcnt *
-                                  sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
-                                dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-       dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-       dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-       dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-       /*
-        * work around chip errata bug 7358, by marking invalid tids
-        * as having max length
-        */
-       dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-               INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-       u32 __iomem *piobuf;
-       u32 pioincr, val32;
-       int i;
-
-       /*
-        * one cache line; long IB headers will spill over into received
-        * buffer
-        */
-       dd->ipath_rcvhdrentsize = 16;
-       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-       /*
-        * For HT, we allocate a somewhat overly large eager buffer,
-        * such that we can guarantee that we can receive the largest
-        * packet that we can send out.  To truly support a 4KB MTU,
-        * we need to bump this to a large value.  To date, other than
-        * testing, we have never encountered an HCA that can really
-        * send 4KB MTU packets, so we do not handle that (we'll get
-        * errors interrupts if we ever see one).
-        */
-       dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-
-       /*
-        * the min() check here is currently a nop, but it may not
-        * always be, depending on just how we do ipath_rcvegrbufsize
-        */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-                                dd->ipath_rcvegrbufsize);
-       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-       ipath_ht_tidtemplate(dd);
-
-       /*
-        * zero all the TID entries at startup.  We do this for sanity,
-        * in case of a previous driver crash of some kind, and also
-        * because the chip powers up with these memories in an unknown
-        * state.  Use portcnt, not cfgports, since this is for the
-        * full chip, not for current (possibly different) configuration
-        * value.
-        * Chip Errata bug 6447
-        */
-       for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-               ipath_ht_clear_tids(dd, val32);
-
-       /*
-        * write the pbc of each buffer, to be sure it's initialized, then
-        * cancel all the buffers, and also abort any packets that might
-        * have been in flight for some reason (the latter is for driver
-        * unload/reload, but isn't a bad idea at first init).  PIO send
-        * isn't enabled at this point, so there is no danger of sending
-        * these out on the wire.
-        * Chip Errata bug 6610
-        */
-       piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-                                 dd->ipath_piobufbase);
-       pioincr = dd->ipath_palign / sizeof(*piobuf);
-       for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-               /*
-                * reasonable word count, just to init pbc
-                */
-               writel(16, piobuf);
-               piobuf += pioincr;
-       }
-
-       ipath_get_eeprom_info(dd);
-       if (dd->ipath_boardrev == 5) {
-               /*
-                * Later production QHT7040 has same changes as QHT7140, so
-                * can use GPIO interrupts.  They have serial #'s starting
-                * with 128, rather than 112.
-                */
-               if (dd->ipath_serial[0] == '1' &&
-                   dd->ipath_serial[1] == '2' &&
-                   dd->ipath_serial[2] == '8')
-                       dd->ipath_flags |= IPATH_GPIO_INTR;
-               else {
-                       ipath_dev_err(dd, "Unsupported InfiniPath board "
-                               "(serial number %.16s)!\n",
-                               dd->ipath_serial);
-                       return 1;
-               }
-       }
-
-       if (dd->ipath_minrev >= 4) {
-               /* Rev4+ reports extra errors via internal GPIO pins */
-               dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
-               dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       return 0;
-}
-
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-       struct ipath_base_info *kinfo = kbase;
-
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-               IPATH_RUNTIME_PIO_REGSWAPPED;
-
-       if (pd->port_dd->ipath_minrev < 4)
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
-
-       return 0;
-}
-
-static void ipath_ht_free_irq(struct ipath_devdata *dd)
-{
-       free_irq(dd->ipath_irq, dd);
-       ht_destroy_irq(dd->ipath_irq);
-       dd->ipath_irq = 0;
-       dd->ipath_intconfig = 0;
-}
-
-static struct ipath_message_header *
-ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
-       return (struct ipath_message_header *)
-               &rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
-       dd->ipath_portcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-       dd->ipath_p0_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_ht_read_counters(struct ipath_devdata *dd,
-                                  struct infinipath_counters *cntrs)
-{
-       cntrs->LBIntCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
-       cntrs->LBFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
-       cntrs->TxSDmaDescCnt = 0;
-       cntrs->TxUnsupVLErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
-       cntrs->TxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
-       cntrs->TxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
-       cntrs->TxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
-       cntrs->TxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
-       cntrs->TxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
-       cntrs->TxUnderrunCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
-       cntrs->TxFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
-       cntrs->TxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
-       cntrs->RxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
-       cntrs->RxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
-       cntrs->RxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
-       cntrs->RxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
-       cntrs->RxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
-       cntrs->RxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
-       cntrs->RxICRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
-       cntrs->RxVCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
-       cntrs->RxFlowCtrlErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
-       cntrs->RxBadFormatCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
-       cntrs->RxLinkProblemCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
-       cntrs->RxEBPCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
-       cntrs->RxLPCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
-       cntrs->RxBufOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
-       cntrs->RxTIDFullErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
-       cntrs->RxTIDValidErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
-       cntrs->RxPKeyMismatchCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
-       cntrs->RxP0HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
-       cntrs->RxP1HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
-       cntrs->RxP2HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
-       cntrs->RxP3HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
-       cntrs->RxP4HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
-       cntrs->RxP5HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
-       cntrs->RxP6HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
-       cntrs->RxP7HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
-       cntrs->RxP8HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
-       cntrs->RxP9HdrEgrOvflCnt = 0;
-       cntrs->RxP10HdrEgrOvflCnt = 0;
-       cntrs->RxP11HdrEgrOvflCnt = 0;
-       cntrs->RxP12HdrEgrOvflCnt = 0;
-       cntrs->RxP13HdrEgrOvflCnt = 0;
-       cntrs->RxP14HdrEgrOvflCnt = 0;
-       cntrs->RxP15HdrEgrOvflCnt = 0;
-       cntrs->RxP16HdrEgrOvflCnt = 0;
-       cntrs->IBStatusChangeCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
-       cntrs->IBLinkErrRecoveryCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
-       cntrs->IBLinkDownedCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
-       cntrs->IBSymbolErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
-       cntrs->RxVL15DroppedPktCnt = 0;
-       cntrs->RxOtherLocalPhyErrCnt = 0;
-       cntrs->PcieRetryBufDiagQwordCnt = 0;
-       cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
-       cntrs->LocalLinkIntegrityErrCnt =
-               (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-               dd->ipath_lli_errs : dd->ipath_lli_errors;
-       cntrs->RxVlErrCnt = 0;
-       cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
-{
-       return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC).  Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining.  To do this right, we reset IBC
- * as well.
- */
-static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
-{
-       u64 val, prev_val;
-
-       prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       val = prev_val | INFINIPATH_XGXS_RESET;
-       prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-}
-
-
-static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
-       int ret;
-
-       switch (which) {
-       case IPATH_IB_CFG_LWID:
-               ret = dd->ipath_link_width_active;
-               break;
-       case IPATH_IB_CFG_SPD:
-               ret = dd->ipath_link_speed_active;
-               break;
-       case IPATH_IB_CFG_LWID_ENB:
-               ret = dd->ipath_link_width_enabled;
-               break;
-       case IPATH_IB_CFG_SPD_ENB:
-               ret = dd->ipath_link_speed_enabled;
-               break;
-       default:
-               ret =  -ENOTSUPP;
-               break;
-       }
-       return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
-       int ret = 0;
-
-       if (which == IPATH_IB_CFG_LWID_ENB)
-               dd->ipath_link_width_enabled = val;
-       else if (which == IPATH_IB_CFG_SPD_ENB)
-               dd->ipath_link_speed_enabled = val;
-       else
-               ret = -ENOTSUPP;
-       return ret;
-}
-
-
-static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
-       ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
-               ipath_ib_linktrstate(dd, ibcs));
-       return 0;
-}
-
-
-/**
- * ipath_init_iba6110_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
-{
-       dd->ipath_f_intrsetup = ipath_ht_intconfig;
-       dd->ipath_f_bus = ipath_setup_ht_config;
-       dd->ipath_f_reset = ipath_setup_ht_reset;
-       dd->ipath_f_get_boardname = ipath_ht_boardname;
-       dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-       dd->ipath_f_early_init = ipath_ht_early_init;
-       dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-       dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-       dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-       dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-       dd->ipath_f_put_tid = ipath_ht_put_tid;
-       dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-       dd->ipath_f_setextled = ipath_setup_ht_setextled;
-       dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-       dd->ipath_f_free_irq = ipath_ht_free_irq;
-       dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-       dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
-       dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
-       dd->ipath_f_config_ports = ipath_ht_config_ports;
-       dd->ipath_f_read_counters = ipath_ht_read_counters;
-       dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
-       dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
-       dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
-       dd->ipath_f_config_jint = ipath_ht_config_jint;
-       dd->ipath_f_ib_updown = ipath_ht_ib_updown;
-
-       /*
-        * initialize chip-specific variables
-        */
-       ipath_init_ht_variables(dd);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644 (file)
index a5eea19..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-/*
- * min buffers we want to have per port, after driver
- */
-#define IPATH_MIN_USER_PORT_BUFCNT 7
-
-/*
- * Number of ports we are configured to use (to allow for more pio
- * buffers per port, etc.)  Zero means use chip value.
- */
-static ushort ipath_cfgports;
-
-module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
-MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
-
-/*
- * Number of buffers reserved for driver (verbs and layered drivers.)
- * Initialized based on number of PIO buffers if not set via module interface.
- * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.
- */
-static ushort ipath_kpiobufs;
-
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
-
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
-                 &ipath_kpiobufs, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
-
-/**
- * create_port0_egr - allocate the eager TID buffers
- * @dd: the infinipath device
- *
- * This code is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance.
- * This is the kernel (port0) version.
- *
- * Allocate the eager TID buffers and program them into infinipath.
- * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like verbs
- * packets, or pass the buffers up to the ipath layered driver and
- * thence the network layer, replacing them as we do so (see
- * ipath_rcv_layer()).
- */
-static int create_port0_egr(struct ipath_devdata *dd)
-{
-       unsigned e, egrcnt;
-       struct ipath_skbinfo *skbinfo;
-       int ret;
-
-       egrcnt = dd->ipath_p0_rcvegrcnt;
-
-       skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
-       if (skbinfo == NULL) {
-               ipath_dev_err(dd, "allocation error for eager TID "
-                             "skb array\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-       for (e = 0; e < egrcnt; e++) {
-               /*
-                * This is a bit tricky in that we allocate extra
-                * space for 2 bytes of the 14 byte ethernet header.
-                * These two bytes are passed in the ipath header so
-                * the rest of the data is word aligned.  We allocate
-                * 4 bytes so that the data buffer stays word aligned.
-                * See ipath_kreceive() for more details.
-                */
-               skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
-               if (!skbinfo[e].skb) {
-                       ipath_dev_err(dd, "SKB allocation error for "
-                                     "eager TID %u\n", e);
-                       while (e != 0)
-                               dev_kfree_skb(skbinfo[--e].skb);
-                       vfree(skbinfo);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-       }
-       /*
-        * After loop above, so we can test non-NULL to see if ready
-        * to use at receive, etc.
-        */
-       dd->ipath_port0_skbinfo = skbinfo;
-
-       for (e = 0; e < egrcnt; e++) {
-               dd->ipath_port0_skbinfo[e].phys =
-                 ipath_map_single(dd->pcidev,
-                                  dd->ipath_port0_skbinfo[e].skb->data,
-                                  dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
-               dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
-                                   ((char __iomem *) dd->ipath_kregbase +
-                                    dd->ipath_rcvegrbase),
-                                   RCVHQ_RCV_TYPE_EAGER,
-                                   dd->ipath_port0_skbinfo[e].phys);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int bringup_link(struct ipath_devdata *dd)
-{
-       u64 val, ibc;
-       int ret = 0;
-
-       /* hold IBC in reset */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-
-       /*
-        * set initial max size pkt IBC will send, including ICRC; it's the
-        * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
-        */
-       val = (dd->ipath_ibmaxlen >> 2) + 1;
-       ibc = val << dd->ibcc_mpl_shift;
-
-       /* flowcontrolwatermark is in units of KBytes */
-       ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
-       /*
-        * How often flowctrl sent.  More or less in usecs; balance against
-        * watermark value, so that in theory senders always get a flow
-        * control update in time to not let the IB link go idle.
-        */
-       ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
-       /* max error tolerance */
-       ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-       /* use "real" buffer space for */
-       ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
-       /* IB credit flow control. */
-       ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-       /* initially come up waiting for TS1, without sending anything. */
-       dd->ipath_ibcctrl = ibc;
-       /*
-        * Want to start out with both LINKCMD and LINKINITCMD in NOP
-        * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-        * to stay a NOP. Flag that we are disabled, for the (unlikely)
-        * case that some recovery path is trying to bring the link up
-        * before we are ready.
-        */
-       ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-               INFINIPATH_IBCC_LINKINITCMD_SHIFT;
-       dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-       ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
-                  (unsigned long long) ibc);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
-
-       // be sure chip saw it
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       ret = dd->ipath_f_bringup_serdes(dd);
-
-       if (ret)
-               dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
-                        "not usable\n");
-       else {
-               /* enable IBC */
-               dd->ipath_control |= INFINIPATH_C_LINKENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                dd->ipath_control);
-       }
-
-       return ret;
-}
-
-static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-
-       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-       if (pd) {
-               pd->port_dd = dd;
-               pd->port_cnt = 1;
-               /* The port 0 pkey table is used by the layer interface. */
-               pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
-               pd->port_seq_cnt = 1;
-       }
-       return pd;
-}
-
-static int init_chip_first(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-       int ret = 0;
-       u64 val;
-
-       spin_lock_init(&dd->ipath_kernel_tid_lock);
-       spin_lock_init(&dd->ipath_user_tid_lock);
-       spin_lock_init(&dd->ipath_sendctrl_lock);
-       spin_lock_init(&dd->ipath_uctxt_lock);
-       spin_lock_init(&dd->ipath_sdma_lock);
-       spin_lock_init(&dd->ipath_gpio_lock);
-       spin_lock_init(&dd->ipath_eep_st_lock);
-       spin_lock_init(&dd->ipath_sdepb_lock);
-       mutex_init(&dd->ipath_eep_lock);
-
-       /*
-        * skip cfgports stuff because we are not allocating memory,
-        * and we don't want problems if the portcnt changed due to
-        * cfgports.  We do still check and report a difference, if
-        * not same (should be impossible).
-        */
-       dd->ipath_f_config_ports(dd, ipath_cfgports);
-       if (!ipath_cfgports)
-               dd->ipath_cfgports = dd->ipath_portcnt;
-       else if (ipath_cfgports <= dd->ipath_portcnt) {
-               dd->ipath_cfgports = ipath_cfgports;
-               ipath_dbg("Configured to use %u ports out of %u in chip\n",
-                         dd->ipath_cfgports, ipath_read_kreg32(dd,
-                         dd->ipath_kregs->kr_portcnt));
-       } else {
-               dd->ipath_cfgports = dd->ipath_portcnt;
-               ipath_dbg("Tried to configured to use %u ports; chip "
-                         "only supports %u\n", ipath_cfgports,
-                         ipath_read_kreg32(dd,
-                                 dd->ipath_kregs->kr_portcnt));
-       }
-       /*
-        * Allocate full portcnt array, rather than just cfgports, because
-        * cleanup iterates across all possible ports.
-        */
-       dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
-                              GFP_KERNEL);
-
-       if (!dd->ipath_pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata array, "
-                             "failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       pd = create_portdata0(dd);
-       if (!pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata for port "
-                             "0, failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-       dd->ipath_pd[0] = pd;
-
-       dd->ipath_rcvtidcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       dd->ipath_rcvtidbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       dd->ipath_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       dd->ipath_rcvegrbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       dd->ipath_palign =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-       dd->ipath_piobufbase =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
-       dd->ipath_piosize2k = val & ~0U;
-       dd->ipath_piosize4k = val >> 32;
-       if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
-               ipath_mtu4096 = 0; /* 4KB not supported by this chip */
-       dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
-       dd->ipath_piobcnt2k = val & ~0U;
-       dd->ipath_piobcnt4k = val >> 32;
-       dd->ipath_pio2kbase =
-               (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                (dd->ipath_piobufbase & 0xffffffff));
-       if (dd->ipath_piobcnt4k) {
-               dd->ipath_pio4kbase = (u32 __iomem *)
-                       (((char __iomem *) dd->ipath_kregbase) +
-                        (dd->ipath_piobufbase >> 32));
-               /*
-                * 4K buffers take 2 pages; we use roundup just to be
-                * paranoid; we calculate it once here, rather than on
-                * ever buf allocate
-                */
-               dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
-                                         dd->ipath_palign);
-               ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
-                         "(%x aligned)\n",
-                         dd->ipath_piobcnt2k, dd->ipath_piosize2k,
-                         dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
-                         dd->ipath_piosize4k, dd->ipath_pio4kbase,
-                         dd->ipath_4kalign);
-       } else {
-               ipath_dbg("%u 2k piobufs @ %p\n",
-                         dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
-       }
-done:
-       return ret;
-}
-
-/**
- * init_chip_reset - re-initialize after a reset, or enable
- * @dd: the infinipath device
- *
- * sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explicitly, in case reset
- * failed
- */
-static int init_chip_reset(struct ipath_devdata *dd)
-{
-       u32 rtmp;
-       int i;
-       unsigned long flags;
-
-       /*
-        * ensure chip does no sends or receives, tail updates, or
-        * pioavail updates while we re-initialize
-        */
-       dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
-       for (i = 0; i < dd->ipath_portcnt; i++) {
-               clear_bit(dd->ipath_r_portenable_shift + i,
-                         &dd->ipath_rcvctrl);
-               clear_bit(dd->ipath_r_intravail_shift + i,
-                         &dd->ipath_rcvctrl);
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-               dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0U; /* no sdma, etc */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       if (rtmp != dd->ipath_rcvtidcnt)
-               dev_info(&dd->pcidev->dev, "tidcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       if (rtmp != dd->ipath_rcvtidbase)
-               dev_info(&dd->pcidev->dev, "tidbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidbase, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       if (rtmp != dd->ipath_rcvegrcnt)
-               dev_info(&dd->pcidev->dev, "egrcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       if (rtmp != dd->ipath_rcvegrbase)
-               dev_info(&dd->pcidev->dev, "egrbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrbase, rtmp);
-
-       return 0;
-}
-
-static int init_pioavailregs(struct ipath_devdata *dd)
-{
-       int ret;
-
-       dd->ipath_pioavailregs_dma = dma_alloc_coherent(
-               &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
-               GFP_KERNEL);
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dev_err(dd, "failed to allocate PIOavail reg area "
-                             "in memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       /*
-        * we really want L2 cache aligned, but for current CPUs of
-        * interest, they are the same.
-        */
-       dd->ipath_statusp = (u64 *)
-               ((char *)dd->ipath_pioavailregs_dma +
-                ((2 * L1_CACHE_BYTES +
-                  dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
-       /* copy the current value now that it's really allocated */
-       *dd->ipath_statusp = dd->_ipath_status;
-       /*
-        * setup buffer to hold freeze msg, accessible to apps,
-        * following statusp
-        */
-       dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
-       /* and its length */
-       dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
-
-       ret = 0;
-
-done:
-       return ret;
-}
-
-/**
- * init_shadow_tids - allocate the shadow TID array
- * @dd: the infinipath device
- *
- * allocate the shadow TID array, so we can ipath_munlock previous
- * entries.  It may make more sense to move the pageshadow to the
- * port data structure, so we only allocate memory for ports actually
- * in use, since we at 8k per port, now.
- */
-static void init_shadow_tids(struct ipath_devdata *dd)
-{
-       struct page **pages;
-       dma_addr_t *addrs;
-
-       pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(struct page *));
-       if (!pages) {
-               ipath_dev_err(dd, "failed to allocate shadow page * "
-                             "array, no expected sends!\n");
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(dma_addr_t));
-       if (!addrs) {
-               ipath_dev_err(dd, "failed to allocate shadow dma handle "
-                             "array, no expected sends!\n");
-               vfree(pages);
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       dd->ipath_pageshadow = pages;
-       dd->ipath_physshadow = addrs;
-}
-
-static void enable_chip(struct ipath_devdata *dd, int reinit)
-{
-       u32 val;
-       u64 rcvmask;
-       unsigned long flags;
-       int i;
-
-       if (!reinit)
-               init_waitqueue_head(&ipath_state_wait);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /* Enable PIO send, and update of PIOavail regs to memory. */
-       dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
-               INFINIPATH_S_PIOBUFAVAILUPD;
-
-       /*
-        * Set the PIO avail update threshold to host memory
-        * on chips that support it.
-        */
-       if (dd->ipath_pioupd_thresh)
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * Enable kernel ports' receive and receive interrupt.
-        * Other ports done as user opens and inits them.
-        */
-       rcvmask = 1ULL;
-       dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
-               (rcvmask << dd->ipath_r_intravail_shift);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
-               dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       /*
-        * now ready for use.  this should be cleared whenever we
-        * detect a reset, or initiate one.
-        */
-       dd->ipath_flags |= IPATH_INITTED;
-
-       /*
-        * Init our shadow copies of head from tail values,
-        * and write head values to match.
-        */
-       val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
-
-       /* Initialize so we interrupt on next packet received */
-       ipath_write_ureg(dd, ur_rcvhdrhead,
-                        dd->ipath_rhdrhead_intr_off |
-                        dd->ipath_pd[0]->port_head, 0);
-
-       /*
-        * by now pioavail updates to memory should have occurred, so
-        * copy them into our working/shadow registers; this is in
-        * case something went wrong with abort, but mostly to get the
-        * initial values of the generation bit correct.
-        */
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               __le64 pioavail;
-
-               /*
-                * Chip Errata bug 6641; even and odd qwords>3 are swapped.
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
-               else
-                       pioavail = dd->ipath_pioavailregs_dma[i];
-               /*
-                * don't need to worry about ipath_pioavailkernel here
-                * because we will call ipath_chg_pioavailkernel() later
-                * in initialization, to busy out buffers as needed
-                */
-               dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
-       }
-       /* can get counters, stats, etc. */
-       dd->ipath_flags |= IPATH_PRESENT;
-}
-
-static int init_housekeeping(struct ipath_devdata *dd, int reinit)
-{
-       char boardn[40];
-       int ret = 0;
-
-       /*
-        * have to clear shadow copies of registers at init that are
-        * not otherwise set here, or all kinds of bizarre things
-        * happen with driver on chip reset
-        */
-       dd->ipath_rcvhdrsize = 0;
-
-       /*
-        * Don't clear ipath_flags as 8bit mode was set before
-        * entering this func. However, we do set the linkstate to
-        * unknown, so we can watch for a transition.
-        * PRESENT is set because we want register reads to work,
-        * and the kernel infrastructure saw it in config space;
-        * We clear it if we have failures.
-        */
-       dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
-       dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
-                            IPATH_LINKDOWN | IPATH_LINKINIT);
-
-       ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
-       dd->ipath_revision =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-
-       /*
-        * set up fundamental info we need to use the chip; we assume
-        * if the revision reg and these regs are OK, we don't need to
-        * special case the rest
-        */
-       dd->ipath_sregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
-       dd->ipath_cregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
-       dd->ipath_uregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
-       ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
-                  "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
-                  dd->ipath_uregbase, dd->ipath_cregbase);
-       if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
-           || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
-               ipath_dev_err(dd, "Register read failures from chip, "
-                             "giving up initialization\n");
-               dd->ipath_flags &= ~IPATH_PRESENT;
-               ret = -ENODEV;
-               goto done;
-       }
-
-
-       /* clear diagctrl register, in case diags were running and crashed */
-       ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
-
-       /* clear the initial reset flag, in case first driver load */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-                        INFINIPATH_E_RESET);
-
-       ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
-                  (unsigned long long) dd->ipath_revision,
-                  dd->ipath_pcirev);
-
-       if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
-            INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
-               ipath_dev_err(dd, "Driver only handles version %d, "
-                             "chip swversion is %d (%llx), failng\n",
-                             IPATH_CHIP_SWVERSION,
-                             (int)(dd->ipath_revision >>
-                                   INFINIPATH_R_SOFTWARE_SHIFT) &
-                             INFINIPATH_R_SOFTWARE_MASK,
-                             (unsigned long long) dd->ipath_revision);
-               ret = -ENOSYS;
-               goto done;
-       }
-       dd->ipath_majrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMAJOR_MASK);
-       dd->ipath_minrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMINOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMINOR_MASK);
-       dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
-                                   INFINIPATH_R_BOARDID_SHIFT) &
-                                  INFINIPATH_R_BOARDID_MASK);
-
-       ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
-
-       snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-                "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
-                "SW Compat %u\n",
-                IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
-                (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
-                INFINIPATH_R_ARCH_MASK,
-                dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
-                (unsigned)(dd->ipath_revision >>
-                           INFINIPATH_R_SOFTWARE_SHIFT) &
-                INFINIPATH_R_SOFTWARE_MASK);
-
-       ipath_dbg("%s", dd->ipath_boardversion);
-
-       if (ret)
-               goto done;
-
-       if (reinit)
-               ret = init_chip_reset(dd);
-       else
-               ret = init_chip_first(dd);
-
-done:
-       return ret;
-}
-
-static void verify_interrupt(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-
-       if (!dd)
-               return; /* being torn down */
-
-       /*
-        * If we don't have any interrupts, let the user know and
-        * don't bother checking again.
-        */
-       if (dd->ipath_int_counter == 0) {
-               if (!dd->ipath_f_intr_fallback(dd))
-                       dev_err(&dd->pcidev->dev, "No interrupts detected, "
-                               "not usable.\n");
-               else /* re-arm the timer to see if fallback works */
-                       mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
-       } else
-               ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
-                       dd->ipath_int_counter);
-}
-
-/**
- * ipath_init_chip - do the actual initialization sequence on the chip
- * @dd: the infinipath device
- * @reinit: reinitializing, so don't allocate new memory
- *
- * Do the actual initialization sequence on the chip.  This is done
- * both from the init routine called from the PCI infrastructure, and
- * when we reset the chip, or detect that it was reset internally,
- * or it's administratively re-enabled.
- *
- * Memory allocation here and in called routines is only done in
- * the first case (reinit == 0).  We have to be careful, because even
- * without memory allocation, we need to re-write all the chip registers
- * TIDs, etc. after the reset or enable has completed.
- */
-int ipath_init_chip(struct ipath_devdata *dd, int reinit)
-{
-       int ret = 0;
-       u32 kpiobufs, defkbufs;
-       u32 piobufs, uports;
-       u64 val;
-       struct ipath_portdata *pd;
-       gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-
-       ret = init_housekeeping(dd, reinit);
-       if (ret)
-               goto done;
-
-       /*
-        * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
-        * but then it no longer nicely fits power of two, and since
-        * we now use routines that backend onto __get_free_pages, the
-        * rest would be wasted.
-        */
-       dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
-                        dd->ipath_rcvhdrcnt);
-
-       /*
-        * Set up the shadow copies of the piobufavail registers,
-        * which we compare against the chip registers for now, and
-        * the in memory DMA'ed copies of the registers.  This has to
-        * be done early, before we calculate lastport, etc.
-        */
-       piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /*
-        * calc number of pioavail registers, and save it; we have 2
-        * bits per buffer.
-        */
-       dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
-               / (sizeof(u64) * BITS_PER_BYTE / 2);
-       uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-       if (piobufs > 144)
-               defkbufs = 32 + dd->ipath_pioreserved;
-       else
-               defkbufs = 16 + dd->ipath_pioreserved;
-
-       if (ipath_kpiobufs && (ipath_kpiobufs +
-               (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
-               int i = (int) piobufs -
-                       (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-               if (i < 1)
-                       i = 1;
-               dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
-                        "%d for kernel leaves too few for %d user ports "
-                        "(%d each); using %u\n", ipath_kpiobufs,
-                        piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
-               /*
-                * shouldn't change ipath_kpiobufs, because could be
-                * different for different devices...
-                */
-               kpiobufs = i;
-       } else if (ipath_kpiobufs)
-               kpiobufs = ipath_kpiobufs;
-       else
-               kpiobufs = defkbufs;
-       dd->ipath_lastport_piobuf = piobufs - kpiobufs;
-       dd->ipath_pbufsport =
-               uports ? dd->ipath_lastport_piobuf / uports : 0;
-       /* if not an even divisor, some user ports get extra buffers */
-       dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
-               (dd->ipath_pbufsport * uports);
-       if (dd->ipath_ports_extrabuf)
-               ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
-                       "ports <= %u\n", dd->ipath_pbufsport,
-                       dd->ipath_ports_extrabuf);
-       dd->ipath_lastpioindex = 0;
-       dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-       /* ipath_pioavailshadow initialized earlier */
-       ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
-                  "each for %u user ports\n", kpiobufs,
-                  piobufs, dd->ipath_pbufsport, uports);
-       ret = dd->ipath_f_early_init(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Early initialization failure\n");
-               goto done;
-       }
-
-       /*
-        * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-        * done after early_init.
-        */
-       dd->ipath_hdrqlast =
-               dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
-                        dd->ipath_rcvhdrentsize);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                        dd->ipath_rcvhdrsize);
-
-       if (!reinit) {
-               ret = init_pioavailregs(dd);
-               init_shadow_tids(dd);
-               if (ret)
-                       goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-                        dd->ipath_pioavailregs_phys);
-
-       /*
-        * this is to detect s/w errors, which the h/w works around by
-        * ignoring the low 6 bits of address, if it wasn't aligned.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
-       if (val != dd->ipath_pioavailregs_phys) {
-               ipath_dev_err(dd, "Catastrophic software error, "
-                             "SendPIOAvailAddr written as %lx, "
-                             "read back as %llx\n",
-                             (unsigned long) dd->ipath_pioavailregs_phys,
-                             (unsigned long long) val);
-               ret = -EINVAL;
-               goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
-
-       /*
-        * make sure we are not in freeze, and PIO send enabled, so
-        * writes to pbc happen
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       /*
-        * before error clears, since we expect serdes pll errors during
-        * this, the first time after reset
-        */
-       if (bringup_link(dd)) {
-               dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
-               ret = -ENETDOWN;
-               goto done;
-       }
-
-       /*
-        * clear any "expected" hwerrs from reset and/or initialization
-        * clear any that aren't enabled (at least this once), and then
-        * set the enable mask
-        */
-       dd->ipath_f_init_hwerrors(dd);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                        dd->ipath_hwerrmask);
-
-       /* clear all */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       /* enable errors that are masked, at least this first time. */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                        ~dd->ipath_maskederrs);
-       dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
-       dd->ipath_errormask =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-       /* clear any interrupts up to this point (ints still not enabled) */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       dd->ipath_f_tidtemplate(dd);
-
-       /*
-        * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
-        * re-init, the simplest way to handle this is to free
-        * existing, and re-allocate.
-        * Need to re-create rest of port 0 portdata as well.
-        */
-       pd = dd->ipath_pd[0];
-       if (reinit) {
-               struct ipath_portdata *npd;
-
-               /*
-                * Alloc and init new ipath_portdata for port0,
-                * Then free old pd. Could lead to fragmentation, but also
-                * makes later support for hot-swap easier.
-                */
-               npd = create_portdata0(dd);
-               if (npd) {
-                       ipath_free_pddata(dd, pd);
-                       dd->ipath_pd[0] = npd;
-                       pd = npd;
-               } else {
-                       ipath_dev_err(dd, "Unable to allocate portdata"
-                                     " for port 0, failing\n");
-                       ret = -ENOMEM;
-                       goto done;
-               }
-       }
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = create_port0_egr(dd);
-       if (ret) {
-               ipath_dev_err(dd, "failed to allocate kernel port's "
-                             "rcvhdrq and/or egr bufs\n");
-               goto done;
-       } else {
-               enable_chip(dd, reinit);
-       }
-
-       /* after enable_chip, so pioavailshadow setup */
-       ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
-
-       /*
-        * Cancel any possible active sends from early driver load.
-        * Follows early_init because some chips have to initialize
-        * PIO buffers in early_init to avoid false parity errors.
-        * After enable and ipath_chg_pioavailkernel so we can safely
-        * enable pioavail updates and PIOENABLE; packets are now
-        * ready to go out.
-        */
-       ipath_cancel_sends(dd, 1);
-
-       if (!reinit) {
-               /*
-                * Used when we close a port, for DMA already in flight
-                * at close.
-                */
-               dd->ipath_dummy_hdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
-                       &dd->ipath_dummy_hdrq_phys,
-                       gfp_flags);
-               if (!dd->ipath_dummy_hdrq) {
-                       dev_info(&dd->pcidev->dev,
-                               "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-                               dd->ipath_pd[0]->port_rcvhdrq_size);
-                       /* fallback to just 0'ing */
-                       dd->ipath_dummy_hdrq_phys = 0UL;
-               }
-       }
-
-       /*
-        * cause retrigger of pending interrupts ignored during init,
-        * even if we had errors
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-
-       if (!dd->ipath_stats_timer_active) {
-               /*
-                * first init, or after an admin disable/enable
-                * set up stats retrieval timer, even if we had errors
-                * in last portion of setup
-                */
-               setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
-                               (unsigned long)dd);
-               /* every 5 seconds; */
-               dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
-               /* takes ~16 seconds to overflow at full IB 4x bandwdith */
-               add_timer(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 1;
-       }
-
-       /* Set up SendDMA if chip supports it */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               ret = setup_sdma(dd);
-
-       /* Set up HoL state */
-       setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
-
-       dd->ipath_hol_state = IPATH_HOL_UP;
-
-done:
-       if (!ret) {
-               *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
-               if (!dd->ipath_f_intrsetup(dd)) {
-                       /* now we can enable all interrupts from the chip */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        -1LL);
-                       /* force re-interrupt of any pending interrupts. */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
-                                        0ULL);
-                       /* chip is usable; mark it as initialized */
-                       *dd->ipath_statusp |= IPATH_STATUS_INITTED;
-
-                       /*
-                        * setup to verify we get an interrupt, and fallback
-                        * to an alternate if necessary and possible
-                        */
-                       if (!reinit) {
-                               setup_timer(&dd->ipath_intrchk_timer,
-                                               verify_interrupt,
-                                               (unsigned long)dd);
-                       }
-                       dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
-                       add_timer(&dd->ipath_intrchk_timer);
-               } else
-                       ipath_dev_err(dd, "No interrupts enabled, couldn't "
-                                     "setup interrupt address\n");
-
-               if (dd->ipath_cfgports > ipath_stats.sps_nports)
-                       /*
-                        * sps_nports is a global, so, we set it to
-                        * the highest number of ports of any of the
-                        * chips we find; we never decrement it, at
-                        * least for now.  Since this might have changed
-                        * over disable/enable or prior to reset, always
-                        * do the check and potentially adjust.
-                        */
-                       ipath_stats.sps_nports = dd->ipath_cfgports;
-       } else
-               ipath_dbg("Failed (%d) to initialize chip\n", ret);
-
-       /* if ret is non-zero, we probably should do some cleanup
-          here... */
-       return ret;
-}
-
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned short val;
-       int ret;
-
-       ret = ipath_parse_ushort(str, &val);
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       if (ret < 0)
-               goto bail;
-
-       if (val == 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               if (dd->ipath_kregbase)
-                       continue;
-               if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-                          (dd->ipath_cfgports *
-                           IPATH_MIN_USER_PORT_BUFCNT)))
-               {
-                       ipath_dev_err(
-                               dd,
-                               "Allocating %d PIO bufs for kernel leaves "
-                               "too few for %d user ports (%d each)\n",
-                               val, dd->ipath_cfgports - 1,
-                               IPATH_MIN_USER_PORT_BUFCNT);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               dd->ipath_lastport_piobuf =
-                       dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
-       }
-
-       ipath_kpiobufs = val;
-       ret = 0;
-bail:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644 (file)
index 0403fa2..0000000
+++ /dev/null
@@ -1,1271 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-
-/*
- * Called when we might have an error that is specific to a particular
- * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
-{
-       u32 piobcnt;
-       unsigned long sbuf[4];
-       /*
-        * it's possible that sendbuffererror could have bits set; might
-        * have already done this as a result of hardware error handling
-        */
-       piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /* read these before writing errorclear */
-       sbuf[0] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror);
-       sbuf[1] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-       if (piobcnt > 128)
-               sbuf[2] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-       if (piobcnt > 192)
-               sbuf[3] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-       else
-               sbuf[3] = 0;
-
-       if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-               int i;
-               if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-                       time_after(dd->ipath_lastcancel, jiffies)) {
-                       __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
-                                         "SendbufErrs %lx %lx", sbuf[0],
-                                         sbuf[1]);
-                       if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-                               printk(" %lx %lx ", sbuf[2], sbuf[3]);
-                       printk("\n");
-               }
-
-               for (i = 0; i < piobcnt; i++)
-                       if (test_bit(i, sbuf))
-                               ipath_disarm_piobufs(dd, i, 1);
-               /* ignore armlaunch errs for a bit */
-               dd->ipath_lastcancel = jiffies+3;
-       }
-}
-
-
-/* These are all rcv-related errors which we want to count for stats */
-#define E_SUM_PKTERRS \
-       (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
-        INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
-        INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
-        INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
-        INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-/* These are all send-related errors which we want to count for stats */
-#define E_SUM_ERRS \
-       (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
-        INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_INVALIDADDR)
-
-/*
- * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
- * errors not related to freeze and cancelling buffers.  Can't ignore
- * armlaunch because could get more while still cleaning up, and need
- * to cancel those as they happen.
- */
-#define E_SPKT_ERRS_IGNORE \
-        (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
-        INFINIPATH_E_SPKTLEN)
-
-/*
- * these are errors that can occur when the link changes state while
- * a packet is being sent or received.  This doesn't cover things
- * like EBP or VCRC that can be the result of a sending having the
- * link change state, so we receive a "known bad" packet.
- */
-#define E_SUM_LINK_PKTERRS \
-       (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RUNEXPCHAR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       u64 ignore_this_time = 0;
-
-       ipath_disarm_senderrbufs(dd);
-       if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       return ignore_this_time;
-}
-
-/* generic hw error messages... */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
-               .msg = "TXE " #a " Memory Parity"            \
-       }
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
-               .msg = "RXE " #a " Memory Parity"            \
-       }
-
-static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
-       INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
-
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
-
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
-};
-
-/**
- * ipath_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
-       strlcat(msg, "[", msgl);
-       strlcat(msg, hwmsg, msgl);
-       strlcat(msg, "]", msgl);
-}
-
-/**
- * ipath_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t msgl)
-{
-       int i;
-       const int glen =
-           ARRAY_SIZE(ipath_generic_hwerror_msgs);
-
-       for (i=0; i<glen; i++) {
-               if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl,
-                                          ipath_generic_hwerror_msgs[i].msg);
-               }
-       }
-
-       for (i=0; i<nhwerrmsgs; i++) {
-               if (hwerrs & hwerrmsgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-               }
-       }
-}
-
-/* return the strings for the most common link states */
-static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       char *ret;
-       u32 state;
-
-       state = ipath_ib_state(dd, ibcs);
-       if (state == dd->ib_init)
-               ret = "Init";
-       else if (state == dd->ib_arm)
-               ret = "Arm";
-       else if (state == dd->ib_active)
-               ret = "Active";
-       else
-               ret = "Down";
-       return ret;
-}
-
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
-{
-       struct ib_event event;
-
-       event.device = &dd->verbs_dev->ibdev;
-       event.element.port_num = 1;
-       event.event = ev;
-       ib_dispatch_event(&event);
-}
-
-static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-                                    ipath_err_t errs)
-{
-       u32 ltstate, lstate, ibstate, lastlstate;
-       u32 init = dd->ib_init;
-       u32 arm = dd->ib_arm;
-       u32 active = dd->ib_active;
-       const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-
-       lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
-       ibstate = ipath_ib_state(dd, ibcs);
-       /* linkstate at last interrupt */
-       lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
-
-       /*
-        * Since going into a recovery state causes the link state to go
-        * down and since recovery is transitory, it is better if we "miss"
-        * ever seeing the link training state go into recovery (i.e.,
-        * ignore this transition for link state special handling purposes)
-        * without even updating ipath_lastibcstat.
-        */
-       if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
-               goto done;
-
-       /*
-        * if linkstate transitions into INIT from any of the various down
-        * states, or if it transitions from any of the up (INIT or better)
-        * states into any of the down states (except link recovery), then
-        * call the chip-specific code to take appropriate actions.
-        */
-       if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
-               lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
-               /* transitioned to UP */
-               if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
-                       /* link came up, so we must no longer be disabled */
-                       dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-                       ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
-               (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
-               ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
-               ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
-               int handled;
-               handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
-               dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
-               if (handled) {
-                       ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       }
-
-       /*
-        * Significant enough to always print and get into logs, if it was
-        * unexpected.  If it was a requested state change, we'll have
-        * already cleared the flags, so we won't print this warning
-        */
-       if ((ibstate != arm && ibstate != active) &&
-           (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-               dev_info(&dd->pcidev->dev, "Link state changed from %s "
-                        "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
-                        "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
-       }
-
-       if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-           ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-               u32 lastlts;
-               lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-               /*
-                * Ignore cycling back and forth from Polling.Active to
-                * Polling.Quiet while waiting for the other end of the link
-                * to come up, except to try and decide if we are connected
-                * to a live IB device or not.  We will cycle back and
-                * forth between them if no cable is plugged in, the other
-                * device is powered off or disabled, etc.
-                */
-               if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-                   lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-                       if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
-                            (++dd->ipath_ibpollcnt == 40)) {
-                               dd->ipath_flags |= IPATH_NOCABLE;
-                               *dd->ipath_statusp |=
-                                       IPATH_STATUS_IB_NOCABLE;
-                               ipath_cdbg(LINKVERB, "Set NOCABLE\n");
-                       }
-                       ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
-                               ipath_ibcstatus_str[ltstate], ibstate);
-                       goto skip_ibchange;
-               }
-       }
-
-       dd->ipath_ibpollcnt = 0; /* not poll*, now */
-       ipath_stats.sps_iblink++;
-
-       if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
-               u64 linkrecov;
-               linkrecov = ipath_snap_cntr(dd,
-                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
-               if (linkrecov != dd->ipath_lastlinkrecov) {
-                       ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
-                               (unsigned long long) ibcs,
-                               ib_linkstate(dd, ibcs),
-                               ipath_ibcstatus_str[ltstate],
-                               (unsigned long long) linkrecov);
-                       /* and no more until active again */
-                       dd->ipath_lastlinkrecov = 0;
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       goto skip_ibchange;
-               }
-       }
-
-       if (ibstate == init || ibstate == arm || ibstate == active) {
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-               if (ibstate == init || ibstate == arm) {
-                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-                       if (dd->ipath_flags & IPATH_LINKACTIVE)
-                               signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               }
-               if (ibstate == arm) {
-                       dd->ipath_flags |= IPATH_LINKARMED;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKINIT | IPATH_LINKDOWN |
-                               IPATH_LINKACTIVE | IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else  if (ibstate == init) {
-                       /*
-                        * set INIT and DOWN.  Down is checked by
-                        * most of the other code, but INIT is
-                        * useful to know in a few places.
-                        */
-                       dd->ipath_flags |= IPATH_LINKINIT |
-                               IPATH_LINKDOWN;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKARMED | IPATH_LINKACTIVE |
-                               IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else {  /* active */
-                       dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
-                               dd->ipath_cregs->cr_iblinkerrrecovcnt);
-                       *dd->ipath_statusp |=
-                               IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-                       dd->ipath_flags |= IPATH_LINKACTIVE;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                               | IPATH_LINKDOWN | IPATH_LINKARMED |
-                               IPATH_NOCABLE);
-                       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-                               ipath_restart_sdma(dd);
-                       signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-                       /* LED active not handled in chip _f_updown */
-                       dd->ipath_f_setextled(dd, lstate, ltstate);
-                       ipath_hol_up(dd);
-               }
-
-               /*
-                * print after we've already done the work, so as not to
-                * delay the state changes and notifications, for debugging
-                */
-               if (lstate == lastlstate)
-                       ipath_cdbg(LINKVERB, "Unchanged from last: %s "
-                               "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
-               else
-                       ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
-                                 dd->ipath_unit, ib_linkstate(dd, ibcs),
-                                 ipath_ibcstatus_str[ltstate],  ibstate);
-       } else { /* down */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKACTIVE |
-                                    IPATH_LINKARMED);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               dd->ipath_lli_counter = 0;
-
-               if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
-                       ipath_cdbg(VERBOSE, "Unit %u link state down "
-                                  "(state 0x%x), from %s\n",
-                                  dd->ipath_unit, lstate,
-                                  ib_linkstate(dd, dd->ipath_lastibcstat));
-               else
-                       ipath_cdbg(LINKVERB, "Unit %u link state changed "
-                                  "to %s (0x%x) from down (%x)\n",
-                                  dd->ipath_unit,
-                                  ipath_ibcstatus_str[ltstate],
-                                  ibstate, lastlstate);
-       }
-
-skip_ibchange:
-       dd->ipath_lastibcstat = ibcs;
-done:
-       return;
-}
-
-static void handle_supp_msgs(struct ipath_devdata *dd,
-                            unsigned supp_msgs, char *msg, u32 msgsz)
-{
-       /*
-        * Print the message unless it's ibc status change only, which
-        * happens so often we never want to count it.
-        */
-       if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-               int iserr;
-               ipath_err_t mask;
-               iserr = ipath_decode_err(dd, msg, msgsz,
-                                        dd->ipath_lasterror &
-                                        ~INFINIPATH_E_IBSTATUSCHANGED);
-
-               mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               if (dd->ipath_lasterror & ~mask)
-                       ipath_dev_err(dd, "Suppressed %u messages for "
-                                     "fast-repeating errors (%s) (%llx)\n",
-                                     supp_msgs, msg,
-                                     (unsigned long long)
-                                     dd->ipath_lasterror);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them. So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg("Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                       "Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-               }
-       }
-}
-
-static unsigned handle_frequent_errors(struct ipath_devdata *dd,
-                                      ipath_err_t errs, char *msg,
-                                      u32 msgsz, int *noprint)
-{
-       unsigned long nc;
-       static unsigned long nextmsg_time;
-       static unsigned nmsgs, supp_msgs;
-
-       /*
-        * Throttle back "fast" messages to no more than 10 per 5 seconds.
-        * This isn't perfect, but it's a reasonable heuristic. If we get
-        * more than 10, give a 6x longer delay.
-        */
-       nc = jiffies;
-       if (nmsgs > 10) {
-               if (time_before(nc, nextmsg_time)) {
-                       *noprint = 1;
-                       if (!supp_msgs++)
-                               nextmsg_time = nc + HZ * 3;
-               } else if (supp_msgs) {
-                       handle_supp_msgs(dd, supp_msgs, msg, msgsz);
-                       supp_msgs = 0;
-                       nmsgs = 0;
-               }
-       } else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
-               nextmsg_time = nc + HZ / 2;
-       }
-
-       return supp_msgs;
-}
-
-static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       unsigned long flags;
-       int expected;
-
-       if (ipath_debug & __IPATH_DBG) {
-               char msg[128];
-               ipath_decode_err(dd, msg, sizeof msg, errs &
-                       INFINIPATH_E_SDMAERRS);
-               ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               unsigned long tl, hd, status, lengen;
-               tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-               hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-               status = ipath_read_kreg64(dd
-                       , dd->ipath_kregs->kr_senddmastatus);
-               lengen = ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_senddmalengen);
-               ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
-                       "lengen 0x%lx\n", tl, hd, status, lengen);
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-       expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!expected)
-               ipath_cancel_sends(dd, 1);
-}
-
-static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
-{
-       unsigned long flags;
-       int expected;
-
-       if ((istat & INFINIPATH_I_SDMAINT) &&
-           !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               ipath_sdma_intr(dd);
-
-       if (istat & INFINIPATH_I_SDMADISABLED) {
-               expected = test_bit(IPATH_SDMA_ABORTING,
-                       &dd->ipath_sdma_status);
-               ipath_dbg("%s SDmaDisabled intr\n",
-                       expected ? "expected" : "unexpected");
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (!expected)
-                       ipath_cancel_sends(dd, 1);
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       }
-}
-
-static int handle_hdrq_full(struct ipath_devdata *dd)
-{
-       int chkerrpkts = 0;
-       u32 hd, tl;
-       u32 i;
-
-       ipath_stats.sps_hdrqfull++;
-       for (i = 0; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (i == 0) {
-                       /*
-                        * For kernel receive queues, we just want to know
-                        * if there are packets in the queue that we can
-                        * process.
-                        */
-                       if (pd->port_head != ipath_get_hdrqtail(pd))
-                               chkerrpkts |= 1 << i;
-                       continue;
-               }
-
-               /* Skip if user context is not open */
-               if (!pd || !pd->port_cnt)
-                       continue;
-
-               /* Don't report the same point multiple times. */
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL)
-                       tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
-               else
-                       tl = ipath_get_rcvhdrtail(pd);
-               if (tl == pd->port_lastrcvhdrqtail)
-                       continue;
-
-               hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
-               if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
-                       pd->port_lastrcvhdrqtail = tl;
-                       pd->port_hdrqfull++;
-                       /* flush hdrqfull so that poll() sees it */
-                       wmb();
-                       wake_up_interruptible(&pd->port_wait);
-               }
-       }
-
-       return chkerrpkts;
-}
-
-static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       char msg[128];
-       u64 ignore_this_time = 0;
-       u64 iserr = 0;
-       int chkerrpkts = 0, noprint = 0;
-       unsigned supp_msgs;
-       int log_idx;
-
-       /*
-        * don't report errors that are masked, either at init
-        * (not set in ipath_errormask), or temporarily (set in
-        * ipath_maskederrs)
-        */
-       errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
-
-       supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
-               &noprint);
-
-       /* do these first, they are most important */
-       if (errs & INFINIPATH_E_HARDWARE) {
-               /* reuse same msg buf */
-               dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
-       } else {
-               u64 mask;
-               for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
-                       mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
-                       if (errs & mask)
-                               ipath_inc_eeprom_err(dd, log_idx, 1);
-               }
-       }
-
-       if (errs & INFINIPATH_E_SDMAERRS)
-               handle_sdma_errors(dd, errs);
-
-       if (!noprint && (errs & ~dd->ipath_e_bitsextant))
-               ipath_dev_err(dd, "error interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (errs & ~dd->ipath_e_bitsextant));
-
-       if (errs & E_SUM_ERRS)
-               ignore_this_time = handle_e_sum_errs(dd, errs);
-       else if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       if (supp_msgs == 250000) {
-               int s_iserr;
-               /*
-                * It's not entirely reasonable assuming that the errors set
-                * in the last clear period are all responsible for the
-                * problem, but the alternative is to assume it's the only
-                * ones on this particular interrupt, which also isn't great
-                */
-               dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
-
-               dd->ipath_errormask &= ~dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               s_iserr = ipath_decode_err(dd, msg, sizeof msg,
-                                          dd->ipath_maskederrs);
-
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL |
-                     INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Temporarily disabling "
-                           "error(s) %llx reporting; too frequent (%s)\n",
-                               (unsigned long long) dd->ipath_maskederrs,
-                               msg);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal",
-                        * for some types of processes (mostly benchmarks)
-                        * that send huge numbers of messages, while not
-                        * processing them.  So only complain about
-                        * these at debug level.
-                        */
-                       if (s_iserr)
-                               ipath_dbg("Temporarily disabling reporting "
-                                   "too frequent queue full errors (%s)\n",
-                                   msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                   "Temporarily disabling reporting too"
-                                   " frequent packet errors (%s)\n",
-                                   msg);
-               }
-
-               /*
-                * Re-enable the masked errors after around 3 minutes.  in
-                * ipath_get_faststats().  If we have a series of fast
-                * repeating but different errors, the interval will keep
-                * stretching out, but that's OK, as that's pretty
-                * catastrophic.
-                */
-               dd->ipath_unmasktime = jiffies + HZ * 180;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
-       if (ignore_this_time)
-               errs &= ~ignore_this_time;
-       if (errs & ~dd->ipath_lasterror) {
-               errs &= ~dd->ipath_lasterror;
-               /* never suppress duplicate hwerrors or ibstatuschange */
-               dd->ipath_lasterror |= errs &
-                       ~(INFINIPATH_E_HARDWARE |
-                         INFINIPATH_E_IBSTATUSCHANGED);
-       }
-
-       if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
-               dd->ipath_spectriggerhit++;
-               ipath_dbg("%lu special trigger hits\n",
-                       dd->ipath_spectriggerhit);
-       }
-
-       /* likely due to cancel; so suppress message unless verbose */
-       if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-               time_after(dd->ipath_lastcancel, jiffies)) {
-               /* armlaunch takes precedence; it often causes both. */
-               ipath_cdbg(VERBOSE,
-                       "Suppressed %s error (%llx) after sendbuf cancel\n",
-                       (errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
-                       "armlaunch" : "sendpktlen", (unsigned long long)errs);
-               errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
-       }
-
-       if (!errs)
-               return 0;
-
-       if (!noprint) {
-               ipath_err_t mask;
-               /*
-                * The ones we mask off are handled specially below
-                * or above.  Also mask SDMADISABLED by default as it
-                * is too chatty.
-                */
-               mask = INFINIPATH_E_IBSTATUSCHANGED |
-                       INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
-       } else
-               /* so we don't need if (!noprint) at strlcat's below */
-               *msg = 0;
-
-       if (errs & E_SUM_PKTERRS) {
-               ipath_stats.sps_pkterrs++;
-               chkerrpkts = 1;
-       }
-       if (errs & E_SUM_ERRS)
-               ipath_stats.sps_errs++;
-
-       if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
-               ipath_stats.sps_crcerrs++;
-               chkerrpkts = 1;
-       }
-       iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
-
-
-       /*
-        * We don't want to print these two as they happen, or we can make
-        * the situation even worse, because it takes so long to print
-        * messages to serial consoles.  Kernel ports get printed from
-        * fast_stats, no more than every 5 seconds, user ports get printed
-        * on close
-        */
-       if (errs & INFINIPATH_E_RRCVHDRFULL)
-               chkerrpkts |= handle_hdrq_full(dd);
-       if (errs & INFINIPATH_E_RRCVEGRFULL) {
-               struct ipath_portdata *pd = dd->ipath_pd[0];
-
-               /*
-                * since this is of less importance and not likely to
-                * happen without also getting hdrfull, only count
-                * occurrences; don't check each port (or even the kernel
-                * vs user)
-                */
-               ipath_stats.sps_etidfull++;
-               if (pd->port_head != ipath_get_hdrqtail(pd))
-                       chkerrpkts |= 1;
-       }
-
-       /*
-        * do this before IBSTATUSCHANGED, in case both bits set in a single
-        * interrupt; we want the STATUSCHANGE to "win", so we do our
-        * internal copy of state machine correctly
-        */
-       if (errs & INFINIPATH_E_RIBLOSTLINK) {
-               /*
-                * force through block below
-                */
-               errs |= INFINIPATH_E_IBSTATUSCHANGED;
-               ipath_stats.sps_iblink++;
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKARMED | IPATH_LINKACTIVE);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-
-               ipath_dbg("Lost link, link now down (%s)\n",
-                       ipath_ibcstatus_str[ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_ibcstatus) & 0xf]);
-       }
-       if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-               handle_e_ibstatuschanged(dd, errs);
-
-       if (errs & INFINIPATH_E_RESET) {
-               if (!noprint)
-                       ipath_dev_err(dd, "Got reset, requires re-init "
-                                     "(unload and reload driver)\n");
-               dd->ipath_flags &= ~IPATH_INITTED;      /* needs re-init */
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
-       }
-
-       if (!noprint && *msg) {
-               if (iserr)
-                       ipath_dev_err(dd, "%s error\n", msg);
-       }
-       if (dd->ipath_state_wanted & dd->ipath_flags) {
-               ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
-                          "waking\n", dd->ipath_state_wanted,
-                          dd->ipath_flags);
-               wake_up_interruptible(&ipath_state_wait);
-       }
-
-       return chkerrpkts;
-}
-
-/*
- * try to cleanup as much as possible for anything that might have gone
- * wrong while in freeze mode, such as pio buffers being written by user
- * processes (causing armlaunch), send errors due to going into freeze mode,
- * etc., and try to avoid causing extra interrupts while doing so.
- * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it while in freeze mode (the register values
- * themselves are kept correct).
- * Make sure that we don't lose any important interrupts by using the chip
- * feature that says that writing 0 to a bit in *clear that is set in
- * *status will cause an interrupt to be generated again (if allowed by
- * the *mask value).
- */
-void ipath_clear_freeze(struct ipath_devdata *dd)
-{
-       /* disable error interrupts, to avoid confusion */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
-
-       /* also disable interrupts; errormask is sometimes overwriten */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       ipath_cancel_sends(dd, 1);
-
-       /* clear the freeze, and be sure chip saw it */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       /* force in-memory update now we are out of freeze */
-       ipath_force_pio_avail_update(dd);
-
-       /*
-        * force new interrupt if any hwerr, error or interrupt bits are
-        * still set, and clear "safe" send packet errors related to freeze
-        * and cancelling sends.  Re-enable error interrupts before possible
-        * force of re-interrupt on pending interrupts.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               E_SPKT_ERRS_IGNORE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-}
-
-
-/* this is separate to allow for better optimization of ipath_intr() */
-
-static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
-{
-       /*
-        * sometimes happen during driver init and unload, don't want
-        * to process any interrupts at that point
-        */
-
-       /* this is just a bandaid, not a fix, if something goes badly
-        * wrong */
-       if (++*unexpectp > 100) {
-               if (++*unexpectp > 105) {
-                       /*
-                        * ok, we must be taking somebody else's interrupts,
-                        * due to a messed up mptable and/or PIRQ table, so
-                        * unregister the interrupt.  We've seen this during
-                        * linuxbios development work, and it may happen in
-                        * the future again.
-                        */
-                       if (dd->pcidev && dd->ipath_irq) {
-                               ipath_dev_err(dd, "Now %u unexpected "
-                                             "interrupts, unregistering "
-                                             "interrupt handler\n",
-                                             *unexpectp);
-                               ipath_dbg("free_irq of irq %d\n",
-                                         dd->ipath_irq);
-                               dd->ipath_f_free_irq(dd);
-                       }
-               }
-               if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
-                       ipath_dev_err(dd, "%u unexpected interrupts, "
-                                     "disabling interrupts completely\n",
-                                     *unexpectp);
-                       /*
-                        * disable all interrupts, something is very wrong
-                        */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        0ULL);
-               }
-       } else if (*unexpectp > 1)
-               ipath_dbg("Interrupt when not ready, should not happen, "
-                         "ignoring\n");
-}
-
-static noinline void ipath_bad_regread(struct ipath_devdata *dd)
-{
-       static int allbits;
-
-       /* separate routine, for better optimization of ipath_intr() */
-
-       /*
-        * We print the message and disable interrupts, in hope of
-        * having a better chance of debugging the problem.
-        */
-       ipath_dev_err(dd,
-                     "Read of interrupt status failed (all bits set)\n");
-       if (allbits++) {
-               /* disable all interrupts, something is very wrong */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-               if (allbits == 2) {
-                       ipath_dev_err(dd, "Still bad interrupt status, "
-                                     "unregistering interrupt\n");
-                       dd->ipath_f_free_irq(dd);
-               } else if (allbits > 2) {
-                       if ((allbits % 10000) == 0)
-                               printk(".");
-               } else
-                       ipath_dev_err(dd, "Disabling interrupts, "
-                                     "multiple errors\n");
-       }
-}
-
-static void handle_layer_pioavail(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = ipath_ib_piobufavail(dd->verbs_dev);
-       if (ret > 0)
-               goto set;
-
-       return;
-set:
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-/*
- * Handle receive interrupts for user ports; this means a user
- * process was waiting for a packet to arrive, and didn't want
- * to poll
- */
-static void handle_urcv(struct ipath_devdata *dd, u64 istat)
-{
-       u64 portr;
-       int i;
-       int rcvdint = 0;
-
-       /*
-        * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
-        * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
-        * would both like timely updates of the bits so that
-        * we don't pass them by unnecessarily.  the rmb()
-        * here ensures that we see them promptly -- the
-        * corresponding wmb()'s are in ipath_poll_urgent()
-        * and ipath_poll_next()...
-        */
-       rmb();
-       portr = ((istat >> dd->ipath_i_rcvavail_shift) &
-                dd->ipath_i_rcvavail_mask) |
-               ((istat >> dd->ipath_i_rcvurg_shift) &
-                dd->ipath_i_rcvurg_mask);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (portr & (1 << i) && pd && pd->port_cnt) {
-                       if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
-                                              &pd->port_flag)) {
-                               clear_bit(i + dd->ipath_r_intravail_shift,
-                                         &dd->ipath_rcvctrl);
-                               wake_up_interruptible(&pd->port_wait);
-                               rcvdint = 1;
-                       } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
-                                                     &pd->port_flag)) {
-                               pd->port_urgent++;
-                               wake_up_interruptible(&pd->port_wait);
-                       }
-               }
-       }
-       if (rcvdint) {
-               /* only want to take one interrupt, so turn off the rcv
-                * interrupt for all the ports that we set the rcv_waiting
-                * (but never for kernel port)
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-       }
-}
-
-irqreturn_t ipath_intr(int irq, void *data)
-{
-       struct ipath_devdata *dd = data;
-       u64 istat, chk0rcv = 0;
-       ipath_err_t estat = 0;
-       irqreturn_t ret;
-       static unsigned unexpected = 0;
-       u64 kportrbits;
-
-       ipath_stats.sps_ints++;
-
-       if (dd->ipath_int_counter != (u32) -1)
-               dd->ipath_int_counter++;
-
-       if (!(dd->ipath_flags & IPATH_PRESENT)) {
-               /*
-                * This return value is not great, but we do not want the
-                * interrupt core code to remove our interrupt handler
-                * because we don't appear to be handling an interrupt
-                * during a chip reset.
-                */
-               return IRQ_HANDLED;
-       }
-
-       /*
-        * this needs to be flags&initted, not statusp, so we keep
-        * taking interrupts even after link goes down, etc.
-        * Also, we *must* clear the interrupt at some point, or we won't
-        * take it again, which can be real bad for errors, etc...
-        */
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               ipath_bad_intr(dd, &unexpected);
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
-
-       if (unlikely(!istat)) {
-               ipath_stats.sps_nullintr++;
-               ret = IRQ_NONE; /* not our interrupt, or already handled */
-               goto bail;
-       }
-       if (unlikely(istat == -1)) {
-               ipath_bad_regread(dd);
-               /* don't know if it was our interrupt or not */
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       if (unexpected)
-               unexpected = 0;
-
-       if (unlikely(istat & ~dd->ipath_i_bitsextant))
-               ipath_dev_err(dd,
-                             "interrupt with unknown interrupts %Lx set\n",
-                             (unsigned long long)
-                             istat & ~dd->ipath_i_bitsextant);
-       else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
-               ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
-                       (unsigned long long) istat);
-
-       if (istat & INFINIPATH_I_ERROR) {
-               ipath_stats.sps_errints++;
-               estat = ipath_read_kreg64(dd,
-                                         dd->ipath_kregs->kr_errorstatus);
-               if (!estat)
-                       dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
-                                "but no error bits set!\n",
-                                (unsigned long long) istat);
-               else if (estat == -1LL)
-                       /*
-                        * should we try clearing all, or hope next read
-                        * works?
-                        */
-                       ipath_dev_err(dd, "Read of error status failed "
-                                     "(all bits set); ignoring\n");
-               else
-                       chk0rcv |= handle_errors(dd, estat);
-       }
-
-       if (istat & INFINIPATH_I_GPIO) {
-               /*
-                * GPIO interrupts fall in two broad classes:
-                * GPIO_2 indicates (on some HT4xx boards) that a packet
-                *        has arrived for Port 0. Checking for this
-                *        is controlled by flag IPATH_GPIO_INTR.
-                * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
-                *        errors that we need to count. Checking for this
-                *        is controlled by flag IPATH_GPIO_ERRINTRS.
-                */
-               u32 gpiostatus;
-               u32 to_clear = 0;
-
-               gpiostatus = ipath_read_kreg32(
-                       dd, dd->ipath_kregs->kr_gpio_status);
-               /* First the error-counter case. */
-               if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
-                   (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
-                       /* want to clear the bits we see asserted. */
-                       to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
-
-                       /*
-                        * Count appropriately, clear bits out of our copy,
-                        * as they have been "handled".
-                        */
-                       if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
-                               ipath_dbg("FlowCtl on UnsupVL\n");
-                               dd->ipath_rxfc_unsupvl_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
-                               ipath_dbg("Overrun Threshold exceeded\n");
-                               dd->ipath_overrun_thresh_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
-                               ipath_dbg("Local Link Integrity error\n");
-                               dd->ipath_lli_errs++;
-                       }
-                       gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
-               }
-               /* Now the Port0 Receive case */
-               if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
-                   (dd->ipath_flags & IPATH_GPIO_INTR)) {
-                       /*
-                        * GPIO status bit 2 is set, and we expected it.
-                        * clear it and indicate in p0bits.
-                        * This probably only happens if a Port0 pkt
-                        * arrives at _just_ the wrong time, and we
-                        * handle that by seting chk0rcv;
-                        */
-                       to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
-                       gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
-                       chk0rcv = 1;
-               }
-               if (gpiostatus) {
-                       /*
-                        * Some unexpected bits remain. If they could have
-                        * caused the interrupt, complain and clear.
-                        * To avoid repetition of this condition, also clear
-                        * the mask. It is almost certainly due to error.
-                        */
-                       const u32 mask = (u32) dd->ipath_gpio_mask;
-
-                       if (mask & gpiostatus) {
-                               ipath_dbg("Unexpected GPIO IRQ bits %x\n",
-                                 gpiostatus & mask);
-                               to_clear |= (gpiostatus & mask);
-                               dd->ipath_gpio_mask &= ~(gpiostatus & mask);
-                               ipath_write_kreg(dd,
-                                       dd->ipath_kregs->kr_gpio_mask,
-                                       dd->ipath_gpio_mask);
-                       }
-               }
-               if (to_clear) {
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-                                       (u64) to_clear);
-               }
-       }
-
-       /*
-        * Clear the interrupt bits we found set, unless they are receive
-        * related, in which case we already cleared them above, and don't
-        * want to clear them again, because we might lose an interrupt.
-        * Clear it early, so we "know" know the chip will have seen this by
-        * the time we process the queue, and will re-interrupt if necessary.
-        * The processor itself won't take the interrupt again until we return.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
-
-       /*
-        * Handle kernel receive queues before checking for pio buffers
-        * available since receives can overflow; piobuf waiters can afford
-        * a few extra cycles, since they were waiting anyway, and user's
-        * waiting for receive are at the bottom.
-        */
-       kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
-               (1ULL << dd->ipath_i_rcvurg_shift);
-       if (chk0rcv || (istat & kportrbits)) {
-               istat &= ~kportrbits;
-               ipath_kreceive(dd->ipath_pd[0]);
-       }
-
-       if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
-                    (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
-               handle_urcv(dd, istat);
-
-       if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
-               handle_sdma_intr(dd, istat);
-
-       if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* always process; sdma verbs uses PIO for acks and VL15  */
-               handle_layer_pioavail(dd);
-       }
-
-       ret = IRQ_HANDLED;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644 (file)
index 66c934a..0000000
+++ /dev/null
@@ -1,1374 +0,0 @@
-#ifndef _IPATH_KERNEL_H
-#define _IPATH_KERNEL_H
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for infinipath kernel code
- * ipath_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_common.h"
-#include "ipath_debug.h"
-#include "ipath_registers.h"
-
-/* only s/w major version of InfiniPath we can handle */
-#define IPATH_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define IPATH_CHIP_VERS_MIN 0U
-
-/* temporary, maybe always */
-extern struct infinipath_stats ipath_stats;
-
-#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define IPATH_EEP_LOG_CNT (4)
-struct ipath_eep_log_mask {
-       u64 errs_to_log;
-       u64 hwerrs_to_log;
-};
-
-struct ipath_portdata {
-       void **port_rcvegrbuf;
-       dma_addr_t *port_rcvegrbuf_phys;
-       /* rcvhdrq base, needs mmap before useful */
-       void *port_rcvhdrq;
-       /* kernel virtual address where hdrqtail is updated */
-       void *port_rcvhdrtail_kvaddr;
-       /*
-        * temp buffer for expected send setup, allocated at open, instead
-        * of each setup call
-        */
-       void *port_tid_pg_list;
-       /* when waiting for rcv or pioavail */
-       wait_queue_head_t port_wait;
-       /*
-        * rcvegr bufs base, physical, must fit
-        * in 44 bits so 32 bit programs mmap64 44 bit works)
-        */
-       dma_addr_t port_rcvegr_phys;
-       /* mmap of hdrq, must fit in 44 bits */
-       dma_addr_t port_rcvhdrq_phys;
-       dma_addr_t port_rcvhdrqtailaddr_phys;
-       /*
-        * number of opens (including slave subports) on this instance
-        * (ignoring forks, dup, etc. for now)
-        */
-       int port_cnt;
-       /*
-        * how much space to leave at start of eager TID entries for
-        * protocol use, on each TID
-        */
-       /* instead of calculating it */
-       unsigned port_port;
-       /* non-zero if port is being shared. */
-       u16 port_subport_cnt;
-       /* non-zero if port is being shared. */
-       u16 port_subport_id;
-       /* number of pio bufs for this port (all procs, if shared) */
-       u32 port_piocnt;
-       /* first pio buffer for this port */
-       u32 port_pio_base;
-       /* chip offset of PIO buffers for this port */
-       u32 port_piobufs;
-       /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
-       u32 port_rcvegrbuf_chunks;
-       /* how many egrbufs per chunk */
-       u32 port_rcvegrbufs_perchunk;
-       /* order for port_rcvegrbuf_pages */
-       size_t port_rcvegrbuf_size;
-       /* rcvhdrq size (for freeing) */
-       size_t port_rcvhdrq_size;
-       /* next expected TID to check when looking for free */
-       u32 port_tidcursor;
-       /* next expected TID to check */
-       unsigned long port_flag;
-       /* what happened */
-       unsigned long int_flag;
-       /* WAIT_RCV that timed out, no interrupt */
-       u32 port_rcvwait_to;
-       /* WAIT_PIO that timed out, no interrupt */
-       u32 port_piowait_to;
-       /* WAIT_RCV already happened, no wait */
-       u32 port_rcvnowait;
-       /* WAIT_PIO already happened, no wait */
-       u32 port_pionowait;
-       /* total number of rcvhdrqfull errors */
-       u32 port_hdrqfull;
-       /*
-        * Used to suppress multiple instances of same
-        * port staying stuck at same point.
-        */
-       u32 port_lastrcvhdrqtail;
-       /* saved total number of rcvhdrqfull errors for poll edge trigger */
-       u32 port_hdrqfull_poll;
-       /* total number of polled urgent packets */
-       u32 port_urgent;
-       /* saved total number of polled urgent packets for poll edge trigger */
-       u32 port_urgent_poll;
-       /* pid of process using this port */
-       struct pid *port_pid;
-       struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
-       /* same size as task_struct .comm[] */
-       char port_comm[TASK_COMM_LEN];
-       /* pkeys set by this use of this port */
-       u16 port_pkeys[4];
-       /* so file ops can get at unit */
-       struct ipath_devdata *port_dd;
-       /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
-       void *subport_uregbase;
-       /* An array of pages for the eager receive buffers * N */
-       void *subport_rcvegrbuf;
-       /* An array of pages for the eager header queue entries * N */
-       void *subport_rcvhdr_base;
-       /* The version of the library which opened this port */
-       u32 userversion;
-       /* Bitmask of active slaves */
-       u32 active_slaves;
-       /* Type of packets or conditions we want to poll for */
-       u16 poll_type;
-       /* port rcvhdrq head offset */
-       u32 port_head;
-       /* receive packet sequence counter */
-       u32 port_seq_cnt;
-};
-
-struct sk_buff;
-struct ipath_sge_state;
-struct ipath_verbs_txreq;
-
-/*
- * control information for layered drivers
- */
-struct _ipath_layer {
-       void *l_arg;
-};
-
-struct ipath_skbinfo {
-       struct sk_buff *skb;
-       dma_addr_t phys;
-};
-
-struct ipath_sdma_txreq {
-       int                 flags;
-       int                 sg_count;
-       union {
-               struct scatterlist *sg;
-               void *map_addr;
-       };
-       void              (*callback)(void *, int);
-       void               *callback_cookie;
-       int                 callback_status;
-       u16                 start_idx;  /* sdma private */
-       u16                 next_descq_idx;  /* sdma private */
-       struct list_head    list;       /* sdma private */
-};
-
-struct ipath_sdma_desc {
-       __le64 qw[2];
-};
-
-#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
-#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
-#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
-#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
-#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
-#define IPATH_SDMA_TXREQ_F_VL15         0x20
-
-#define IPATH_SDMA_TXREQ_S_OK        0
-#define IPATH_SDMA_TXREQ_S_SENDERROR 1
-#define IPATH_SDMA_TXREQ_S_ABORTED   2
-#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
-
-#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG    (1ull << 63)
-#define IPATH_SDMA_STATUS_ABORT_IN_PROG                        (1ull << 62)
-#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE         (1ull << 61)
-#define IPATH_SDMA_STATUS_SCB_EMPTY                    (1ull << 30)
-
-/* max dwords in small buffer packet */
-#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
-
-/*
- * Possible IB config parameters for ipath_f_get/set_ib_cfg()
- */
-#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
-#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
-#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
-#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
-#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
-#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
-#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
-#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
-#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
-#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
-#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
-
-
-struct ipath_devdata {
-       struct list_head ipath_list;
-
-       struct ipath_kregs const *ipath_kregs;
-       struct ipath_cregs const *ipath_cregs;
-
-       /* mem-mapped pointer to base of chip regs */
-       u64 __iomem *ipath_kregbase;
-       /* end of mem-mapped chip space; range checking */
-       u64 __iomem *ipath_kregend;
-       /* physical address of chip for io_remap, etc. */
-       unsigned long ipath_physaddr;
-       /* base of memory alloced for ipath_kregbase, for free */
-       u64 *ipath_kregalloc;
-       /* ipath_cfgports pointers */
-       struct ipath_portdata **ipath_pd;
-       /* sk_buffs used by port 0 eager receive queue */
-       struct ipath_skbinfo *ipath_port0_skbinfo;
-       /* kvirt address of 1st 2k pio buffer */
-       void __iomem *ipath_pio2kbase;
-       /* kvirt address of 1st 4k pio buffer */
-       void __iomem *ipath_pio4kbase;
-       /*
-        * points to area where PIOavail registers will be DMA'ed.
-        * Has to be on a page of it's own, because the page will be
-        * mapped into user program space.  This copy is *ONLY* ever
-        * written by DMA, not by the driver!  Need a copy per device
-        * when we get to multiple devices
-        */
-       volatile __le64 *ipath_pioavailregs_dma;
-       /* physical address where updates occur */
-       dma_addr_t ipath_pioavailregs_phys;
-       struct _ipath_layer ipath_layer;
-       /* setup intr */
-       int (*ipath_f_intrsetup)(struct ipath_devdata *);
-       /* fallback to alternate interrupt type if possible */
-       int (*ipath_f_intr_fallback)(struct ipath_devdata *);
-       /* setup on-chip bus config */
-       int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
-       /* hard reset chip */
-       int (*ipath_f_reset)(struct ipath_devdata *);
-       int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
-                                    size_t);
-       void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
-       void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
-                                       size_t);
-       void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
-       int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
-       int (*ipath_f_early_init)(struct ipath_devdata *);
-       void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
-       void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
-                               u32, unsigned long);
-       void (*ipath_f_tidtemplate)(struct ipath_devdata *);
-       void (*ipath_f_cleanup)(struct ipath_devdata *);
-       void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
-       /* fill out chip-specific fields */
-       int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-       /* free irq */
-       void (*ipath_f_free_irq)(struct ipath_devdata *);
-       struct ipath_message_header *(*ipath_f_get_msgheader)
-                                       (struct ipath_devdata *, __le32 *);
-       void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
-       int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
-       int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
-       void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
-       void (*ipath_f_read_counters)(struct ipath_devdata *,
-                                       struct infinipath_counters *);
-       void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
-       /* per chip actions needed for IB Link up/down changes */
-       int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
-
-       unsigned ipath_lastegr_idx;
-       struct ipath_ibdev *verbs_dev;
-       struct timer_list verbs_timer;
-       /* total dwords sent (summed from counter) */
-       u64 ipath_sword;
-       /* total dwords rcvd (summed from counter) */
-       u64 ipath_rword;
-       /* total packets sent (summed from counter) */
-       u64 ipath_spkts;
-       /* total packets rcvd (summed from counter) */
-       u64 ipath_rpkts;
-       /* ipath_statusp initially points to this. */
-       u64 _ipath_status;
-       /* GUID for this interface, in network order */
-       __be64 ipath_guid;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of error reporting
-        */
-       ipath_err_t ipath_lasterror;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of hwerror reporting
-        */
-       ipath_err_t ipath_lasthwerror;
-       /* errors masked because they occur too fast */
-       ipath_err_t ipath_maskederrs;
-       u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
-       /* these 5 fields are used to establish deltas for IB Symbol
-        * errors and linkrecovery errors. They can be reported on
-        * some chips during link negotiation prior to INIT, and with
-        * DDR when faking DDR negotiations with non-IBTA switches.
-        * The chip counters are adjusted at driver unload if there is
-        * a non-zero delta.
-        */
-       u64 ibdeltainprog;
-       u64 ibsymdelta;
-       u64 ibsymsnap;
-       u64 iblnkerrdelta;
-       u64 iblnkerrsnap;
-
-       /* time in jiffies at which to re-enable maskederrs */
-       unsigned long ipath_unmasktime;
-       /* count of egrfull errors, combined for all ports */
-       u64 ipath_last_tidfull;
-       /* for ipath_qcheck() */
-       u64 ipath_lastport0rcv_cnt;
-       /* template for writing TIDs  */
-       u64 ipath_tidtemplate;
-       /* value to write to free TIDs */
-       u64 ipath_tidinvalid;
-       /* IBA6120 rcv interrupt setup */
-       u64 ipath_rhdrhead_intr_off;
-
-       /* size of memory at ipath_kregbase */
-       u32 ipath_kregsize;
-       /* number of registers used for pioavail */
-       u32 ipath_pioavregs;
-       /* IPATH_POLL, etc. */
-       u32 ipath_flags;
-       /* ipath_flags driver is waiting for */
-       u32 ipath_state_wanted;
-       /* last buffer for user use, first buf for kernel use is this
-        * index. */
-       u32 ipath_lastport_piobuf;
-       /* is a stats timer active */
-       u32 ipath_stats_timer_active;
-       /* number of interrupts for this device -- saturates... */
-       u32 ipath_int_counter;
-       /* dwords sent read from counter */
-       u32 ipath_lastsword;
-       /* dwords received read from counter */
-       u32 ipath_lastrword;
-       /* sent packets read from counter */
-       u32 ipath_lastspkts;
-       /* received packets read from counter */
-       u32 ipath_lastrpkts;
-       /* pio bufs allocated per port */
-       u32 ipath_pbufsport;
-       /* if remainder on bufs/port, ports < extrabuf get 1 extra */
-       u32 ipath_ports_extrabuf;
-       u32 ipath_pioupd_thresh; /* update threshold, some chips */
-       /*
-        * number of ports configured as max; zero is set to number chip
-        * supports, less gives more pio bufs/port, etc.
-        */
-       u32 ipath_cfgports;
-       /* count of port 0 hdrqfull errors */
-       u32 ipath_p0_hdrqfull;
-       /* port 0 number of receive eager buffers */
-       u32 ipath_p0_rcvegrcnt;
-
-       /*
-        * index of last piobuffer we used.  Speeds up searching, by
-        * starting at this point.  Doesn't matter if multiple cpu's use and
-        * update, last updater is only write that matters.  Whenever it
-        * wraps, we update shadow copies.  Need a copy per device when we
-        * get to multiple devices
-        */
-       u32 ipath_lastpioindex;
-       u32 ipath_lastpioindexl;
-       /* max length of freezemsg */
-       u32 ipath_freezelen;
-       /*
-        * consecutive times we wanted a PIO buffer but were unable to
-        * get one
-        */
-       u32 ipath_consec_nopiobuf;
-       /*
-        * hint that we should update ipath_pioavailshadow before
-        * looking for a PIO buffer
-        */
-       u32 ipath_upd_pio_shadow;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar0;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar1;
-       u32 ipath_x1_fix_tries;
-       u32 ipath_autoneg_tries;
-       u32 serdes_first_init_done;
-
-       struct ipath_relock {
-               atomic_t ipath_relock_timer_active;
-               struct timer_list ipath_relock_timer;
-               unsigned int ipath_relock_interval; /* in jiffies */
-       } ipath_relock_singleton;
-
-       /* interrupt number */
-       int ipath_irq;
-       /* HT/PCI Vendor ID (here for NodeInfo) */
-       u16 ipath_vendorid;
-       /* HT/PCI Device ID (here for NodeInfo) */
-       u16 ipath_deviceid;
-       /* offset in HT config space of slave/primary interface block */
-       u8 ipath_ht_slave_off;
-       /* for write combining settings */
-       int wc_cookie;
-       /* ref count for each pkey */
-       atomic_t ipath_pkeyrefs[4];
-       /* shadow copy of struct page *'s for exp tid pages */
-       struct page **ipath_pageshadow;
-       /* shadow copy of dma handles for exp tid pages */
-       dma_addr_t *ipath_physshadow;
-       u64 __iomem *ipath_egrtidbase;
-       /* lock to workaround chip bug 9437 and others */
-       spinlock_t ipath_kernel_tid_lock;
-       spinlock_t ipath_user_tid_lock;
-       spinlock_t ipath_sendctrl_lock;
-       /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
-       spinlock_t ipath_uctxt_lock;
-
-       /*
-        * IPATH_STATUS_*,
-        * this address is mapped readonly into user processes so they can
-        * get status cheaply, whenever they want.
-        */
-       u64 *ipath_statusp;
-       /* freeze msg if hw error put chip in freeze */
-       char *ipath_freezemsg;
-       /* pci access data structure */
-       struct pci_dev *pcidev;
-       struct cdev *user_cdev;
-       struct cdev *diag_cdev;
-       struct device *user_dev;
-       struct device *diag_dev;
-       /* timer used to prevent stats overflow, error throttling, etc. */
-       struct timer_list ipath_stats_timer;
-       /* timer to verify interrupts work, and fallback if possible */
-       struct timer_list ipath_intrchk_timer;
-       void *ipath_dummy_hdrq; /* used after port close */
-       dma_addr_t ipath_dummy_hdrq_phys;
-
-       /* SendDMA related entries */
-       spinlock_t            ipath_sdma_lock;
-       unsigned long         ipath_sdma_status;
-       unsigned long         ipath_sdma_abort_jiffies;
-       unsigned long         ipath_sdma_abort_intr_timeout;
-       unsigned long         ipath_sdma_buf_jiffies;
-       struct ipath_sdma_desc *ipath_sdma_descq;
-       u64                   ipath_sdma_descq_added;
-       u64                   ipath_sdma_descq_removed;
-       int                   ipath_sdma_desc_nreserved;
-       u16                   ipath_sdma_descq_cnt;
-       u16                   ipath_sdma_descq_tail;
-       u16                   ipath_sdma_descq_head;
-       u16                   ipath_sdma_next_intr;
-       u16                   ipath_sdma_reset_wait;
-       u8                    ipath_sdma_generation;
-       struct tasklet_struct ipath_sdma_abort_task;
-       struct tasklet_struct ipath_sdma_notify_task;
-       struct list_head      ipath_sdma_activelist;
-       struct list_head      ipath_sdma_notifylist;
-       atomic_t              ipath_sdma_vl15_count;
-       struct timer_list     ipath_sdma_vl15_timer;
-
-       dma_addr_t       ipath_sdma_descq_phys;
-       volatile __le64 *ipath_sdma_head_dma;
-       dma_addr_t       ipath_sdma_head_phys;
-
-       unsigned long ipath_ureg_align; /* user register alignment */
-
-       struct delayed_work ipath_autoneg_work;
-       wait_queue_head_t ipath_autoneg_wait;
-
-       /* HoL blocking / user app forward-progress state */
-       unsigned          ipath_hol_state;
-       unsigned          ipath_hol_next;
-       struct timer_list ipath_hol_timer;
-
-       /*
-        * Shadow copies of registers; size indicates read access size.
-        * Most of them are readonly, but some are write-only register,
-        * where we manipulate the bits in the shadow copy, and then write
-        * the shadow copy to infinipath.
-        *
-        * We deliberately make most of these 32 bits, since they have
-        * restricted range.  For any that we read, we won't to generate 32
-        * bit accesses, since Opteron will generate 2 separate 32 bit HT
-        * transactions for a 64 bit read, and we want to avoid unnecessary
-        * HT transactions.
-        */
-
-       /* This is the 64 bit group */
-
-       /*
-        * shadow of pioavail, check to be sure it's large enough at
-        * init time.
-        */
-       unsigned long ipath_pioavailshadow[8];
-       /* bitmap of send buffers available for the kernel to use with PIO. */
-       unsigned long ipath_pioavailkernel[8];
-       /* shadow of kr_gpio_out, for rmw ops */
-       u64 ipath_gpio_out;
-       /* shadow the gpio mask register */
-       u64 ipath_gpio_mask;
-       /* shadow the gpio output enable, etc... */
-       u64 ipath_extctrl;
-       /* kr_revision shadow */
-       u64 ipath_revision;
-       /*
-        * shadow of ibcctrl, for interrupt handling of link changes,
-        * etc.
-        */
-       u64 ipath_ibcctrl;
-       /*
-        * last ibcstatus, to suppress "duplicate" status change messages,
-        * mostly from 2 to 3
-        */
-       u64 ipath_lastibcstat;
-       /* hwerrmask shadow */
-       ipath_err_t ipath_hwerrmask;
-       ipath_err_t ipath_errormask; /* errormask shadow */
-       /* interrupt config reg shadow */
-       u64 ipath_intconfig;
-       /* kr_sendpiobufbase value */
-       u64 ipath_piobufbase;
-       /* kr_ibcddrctrl shadow */
-       u64 ipath_ibcddrctrl;
-
-       /* these are the "32 bit" regs */
-
-       /*
-        * number of GUIDs in the flash for this interface; may need some
-        * rethinking for setting on other ifaces
-        */
-       u32 ipath_nguid;
-       /*
-        * the following two are 32-bit bitmasks, but {test,clear,set}_bit
-        * all expect bit fields to be "unsigned long"
-        */
-       /* shadow kr_rcvctrl */
-       unsigned long ipath_rcvctrl;
-       /* shadow kr_sendctrl */
-       unsigned long ipath_sendctrl;
-       /* to not count armlaunch after cancel */
-       unsigned long ipath_lastcancel;
-       /* count cases where special trigger was needed (double write) */
-       unsigned long ipath_spectriggerhit;
-
-       /* value we put in kr_rcvhdrcnt */
-       u32 ipath_rcvhdrcnt;
-       /* value we put in kr_rcvhdrsize */
-       u32 ipath_rcvhdrsize;
-       /* value we put in kr_rcvhdrentsize */
-       u32 ipath_rcvhdrentsize;
-       /* offset of last entry in rcvhdrq */
-       u32 ipath_hdrqlast;
-       /* kr_portcnt value */
-       u32 ipath_portcnt;
-       /* kr_pagealign value */
-       u32 ipath_palign;
-       /* number of "2KB" PIO buffers */
-       u32 ipath_piobcnt2k;
-       /* size in bytes of "2KB" PIO buffers */
-       u32 ipath_piosize2k;
-       /* number of "4KB" PIO buffers */
-       u32 ipath_piobcnt4k;
-       /* size in bytes of "4KB" PIO buffers */
-       u32 ipath_piosize4k;
-       u32 ipath_pioreserved; /* reserved special-inkernel; */
-       /* kr_rcvegrbase value */
-       u32 ipath_rcvegrbase;
-       /* kr_rcvegrcnt value */
-       u32 ipath_rcvegrcnt;
-       /* kr_rcvtidbase value */
-       u32 ipath_rcvtidbase;
-       /* kr_rcvtidcnt value */
-       u32 ipath_rcvtidcnt;
-       /* kr_sendregbase */
-       u32 ipath_sregbase;
-       /* kr_userregbase */
-       u32 ipath_uregbase;
-       /* kr_counterregbase */
-       u32 ipath_cregbase;
-       /* shadow the control register contents */
-       u32 ipath_control;
-       /* PCI revision register (HTC rev on FPGA) */
-       u32 ipath_pcirev;
-
-       /* chip address space used by 4k pio buffers */
-       u32 ipath_4kalign;
-       /* The MTU programmed for this unit */
-       u32 ipath_ibmtu;
-       /*
-        * The max size IB packet, included IB headers that we can send.
-        * Starts same as ipath_piosize, but is affected when ibmtu is
-        * changed, or by size of eager buffers
-        */
-       u32 ipath_ibmaxlen;
-       /*
-        * ibmaxlen at init time, limited by chip and by receive buffer
-        * size.  Not changed after init.
-        */
-       u32 ipath_init_ibmaxlen;
-       /* size of each rcvegrbuffer */
-       u32 ipath_rcvegrbufsize;
-       /* localbus width (1, 2,4,8,16,32) from config space  */
-       u32 ipath_lbus_width;
-       /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
-       u32 ipath_lbus_speed;
-       /*
-        * number of sequential ibcstatus change for polling active/quiet
-        * (i.e., link not coming up).
-        */
-       u32 ipath_ibpollcnt;
-       /* low and high portions of MSI capability/vector */
-       u32 ipath_msi_lo;
-       /* saved after PCIe init for restore after reset */
-       u32 ipath_msi_hi;
-       /* MSI data (vector) saved for restore */
-       u16 ipath_msi_data;
-       /* MLID programmed for this instance */
-       u16 ipath_mlid;
-       /* LID programmed for this instance */
-       u16 ipath_lid;
-       /* list of pkeys programmed; 0 if not set */
-       u16 ipath_pkeys[4];
-       /*
-        * ASCII serial number, from flash, large enough for original
-        * all digit strings, and longer QLogic serial number format
-        */
-       u8 ipath_serial[16];
-       /* human readable board version */
-       u8 ipath_boardversion[96];
-       u8 ipath_lbus_info[32]; /* human readable localbus info */
-       /* chip major rev, from ipath_revision */
-       u8 ipath_majrev;
-       /* chip minor rev, from ipath_revision */
-       u8 ipath_minrev;
-       /* board rev, from ipath_revision */
-       u8 ipath_boardrev;
-       /* saved for restore after reset */
-       u8 ipath_pci_cacheline;
-       /* LID mask control */
-       u8 ipath_lmc;
-       /* link width supported */
-       u8 ipath_link_width_supported;
-       /* link speed supported */
-       u8 ipath_link_speed_supported;
-       u8 ipath_link_width_enabled;
-       u8 ipath_link_speed_enabled;
-       u8 ipath_link_width_active;
-       u8 ipath_link_speed_active;
-       /* Rx Polarity inversion (compensate for ~tx on partner) */
-       u8 ipath_rx_pol_inv;
-
-       u8 ipath_r_portenable_shift;
-       u8 ipath_r_intravail_shift;
-       u8 ipath_r_tailupd_shift;
-       u8 ipath_r_portcfg_shift;
-
-       /* unit # of this chip, if present */
-       int ipath_unit;
-
-       /* local link integrity counter */
-       u32 ipath_lli_counter;
-       /* local link integrity errors */
-       u32 ipath_lli_errors;
-       /*
-        * Above counts only cases where _successive_ LocalLinkIntegrity
-        * errors were seen in the receive headers of kern-packets.
-        * Below are the three (monotonically increasing) counters
-        * maintained via GPIO interrupts on iba6120-rev2.
-        */
-       u32 ipath_rxfc_unsupvl_errs;
-       u32 ipath_overrun_thresh_errs;
-       u32 ipath_lli_errs;
-
-       /*
-        * Not all devices managed by a driver instance are the same
-        * type, so these fields must be per-device.
-        */
-       u64 ipath_i_bitsextant;
-       ipath_err_t ipath_e_bitsextant;
-       ipath_err_t ipath_hwe_bitsextant;
-
-       /*
-        * Below should be computable from number of ports,
-        * since they are never modified.
-        */
-       u64 ipath_i_rcvavail_mask;
-       u64 ipath_i_rcvurg_mask;
-       u16 ipath_i_rcvurg_shift;
-       u16 ipath_i_rcvavail_shift;
-
-       /*
-        * Register bits for selecting i2c direction and values, used for
-        * I2C serial flash.
-        */
-       u8 ipath_gpio_sda_num;
-       u8 ipath_gpio_scl_num;
-       u8 ipath_i2c_chain_type;
-       u64 ipath_gpio_sda;
-       u64 ipath_gpio_scl;
-
-       /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
-       spinlock_t ipath_gpio_lock;
-
-       /*
-        * IB link and linktraining states and masks that vary per chip in
-        * some way.  Set at init, to avoid each IB status change interrupt
-        */
-       u8 ibcs_ls_shift;
-       u8 ibcs_lts_mask;
-       u32 ibcs_mask;
-       u32 ib_init;
-       u32 ib_arm;
-       u32 ib_active;
-
-       u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
-
-       /*
-        * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
-        * reg. Changes for IBA7220
-        */
-       u8 ibcc_lic_mask; /* LinkInitCmd */
-       u8 ibcc_lc_shift; /* LinkCmd */
-       u8 ibcc_mpl_shift; /* Maxpktlen */
-
-       u8 delay_mult;
-
-       /* used to override LED behavior */
-       u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
-       u16 ipath_led_override_timeoff; /* delta to next timer event */
-       u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
-       u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
-       atomic_t ipath_led_override_timer_active;
-       /* Used to flash LEDs in override mode */
-       struct timer_list ipath_led_override_timer;
-
-       /* Support (including locks) for EEPROM logging of errors and time */
-       /* control access to actual counters, timer */
-       spinlock_t ipath_eep_st_lock;
-       /* control high-level access to EEPROM */
-       struct mutex ipath_eep_lock;
-       /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
-       uint64_t ipath_traffic_wds;
-       /* active time is kept in seconds, but logged in hours */
-       atomic_t ipath_active_time;
-       /* Below are nominal shadow of EEPROM, new since last EEPROM update */
-       uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
-       uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
-       uint16_t ipath_eep_hrs;
-       /*
-        * masks for which bits of errs, hwerrs that cause
-        * each of the counters to increment.
-        */
-       struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
-
-       /* interrupt mitigation reload register info */
-       u16 ipath_jint_idle_ticks;      /* idle clock ticks */
-       u16 ipath_jint_max_packets;     /* max packets across all ports */
-
-       /*
-        * lock for access to SerDes, and flags to sequence preset
-        * versus steady-state. 7220-only at the moment.
-        */
-       spinlock_t ipath_sdepb_lock;
-       u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
-};
-
-/* ipath_hol_state values (stopping/starting user proc, send flushing) */
-#define IPATH_HOL_UP       0
-#define IPATH_HOL_DOWN     1
-/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
-#define IPATH_HOL_DOWNSTOP 0
-#define IPATH_HOL_DOWNCONT 1
-
-/* bit positions for sdma_status */
-#define IPATH_SDMA_ABORTING  0
-#define IPATH_SDMA_DISARMED  1
-#define IPATH_SDMA_DISABLED  2
-#define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  30
-#define IPATH_SDMA_SHUTDOWN 31
-
-/* bit combinations that correspond to abort states */
-#define IPATH_SDMA_ABORT_NONE 0
-#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
-#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED))
-#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-
-#define IPATH_SDMA_BUF_NONE 0
-#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
-
-/* Private data for file operations */
-struct ipath_filedata {
-       struct ipath_portdata *pd;
-       unsigned subport;
-       unsigned tidcursor;
-       struct ipath_user_sdma_queue *pq;
-};
-extern struct list_head ipath_dev_list;
-extern spinlock_t ipath_devs_lock;
-extern struct ipath_devdata *ipath_lookup(int unit);
-
-int ipath_init_chip(struct ipath_devdata *, int);
-int ipath_enable_wc(struct ipath_devdata *dd);
-void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
-void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_clear_freeze(struct ipath_devdata *);
-
-struct file_operations;
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp);
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp);
-
-int ipath_diag_add(struct ipath_devdata *);
-void ipath_diag_remove(struct ipath_devdata *);
-
-extern wait_queue_head_t ipath_state_wait;
-
-int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_remove(struct ipath_devdata *dd);
-
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
-
-extern int ipath_diag_inuse;
-
-irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-                    ipath_err_t err);
-#if __IPATH_INFO || __IPATH_DBG
-extern const char *ipath_ibcstatus_str[];
-#endif
-
-/* clean up any per-chip chip-specific stuff */
-void ipath_chip_cleanup(struct ipath_devdata *);
-/* clean up any chip type-specific stuff */
-void ipath_chip_done(void);
-
-void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
-                         unsigned cnt);
-void ipath_cancel_sends(struct ipath_devdata *, int);
-
-int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
-
-int ipath_parse_ushort(const char *str, unsigned short *valp);
-
-void ipath_kreceive(struct ipath_portdata *);
-int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
-int ipath_reset_device(int);
-void ipath_get_faststats(unsigned long);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-int ipath_set_linkstate(struct ipath_devdata *, u8);
-int ipath_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
-void ipath_enable_armlaunch(struct ipath_devdata *);
-void ipath_disable_armlaunch(struct ipath_devdata *);
-void ipath_hol_down(struct ipath_devdata *);
-void ipath_hol_up(struct ipath_devdata *);
-void ipath_hol_event(unsigned long);
-void ipath_toggle_rclkrls(struct ipath_devdata *);
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
-void ipath_set_relock_poll(struct ipath_devdata *, int);
-void ipath_shutdown_relock_poll(struct ipath_devdata *);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
-#define subport_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->subport
-#define tidcursor_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->tidcursor
-#define user_sdma_queue_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->pq
-
-/*
- * values for ipath_flags
- */
-               /* chip can report link latency (IB 1.2) */
-#define IPATH_HAS_LINK_LATENCY 0x1
-               /* The chip is up and initted */
-#define IPATH_INITTED       0x2
-               /* set if any user code has set kr_rcvhdrsize */
-#define IPATH_RCVHDRSZ_SET  0x4
-               /* The chip is present and valid for accesses */
-#define IPATH_PRESENT       0x8
-               /* HT link0 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT0   0x10
-               /* HT link1 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT1   0x20
-               /* The link is down */
-#define IPATH_LINKDOWN      0x40
-               /* The link level is up (0x11) */
-#define IPATH_LINKINIT      0x80
-               /* The link is in the armed (0x21) state */
-#define IPATH_LINKARMED     0x100
-               /* The link is in the active (0x31) state */
-#define IPATH_LINKACTIVE    0x200
-               /* link current state is unknown */
-#define IPATH_LINKUNK       0x400
-               /* Write combining flush needed for PIO */
-#define IPATH_PIO_FLUSH_WC  0x1000
-               /* DMA Receive tail pointer */
-#define IPATH_NODMA_RTAIL   0x2000
-               /* no IB cable, or no device on IB cable */
-#define IPATH_NOCABLE       0x4000
-               /* Supports port zero per packet receive interrupts via
-                * GPIO */
-#define IPATH_GPIO_INTR     0x8000
-               /* uses the coded 4byte TID, not 8 byte */
-#define IPATH_4BYTE_TID     0x10000
-               /* packet/word counters are 32 bit, else those 4 counters
-                * are 64bit */
-#define IPATH_32BITCOUNTERS 0x20000
-               /* Interrupt register is 64 bits */
-#define IPATH_INTREG_64     0x40000
-               /* can miss port0 rx interrupts */
-#define IPATH_DISABLED      0x80000 /* administratively disabled */
-               /* Use GPIO interrupts for new counters */
-#define IPATH_GPIO_ERRINTRS 0x100000
-#define IPATH_SWAP_PIOBUFS  0x200000
-               /* Supports Send DMA */
-#define IPATH_HAS_SEND_DMA  0x400000
-               /* Supports Send Count (not just word count) in PBC */
-#define IPATH_HAS_PBC_CNT   0x800000
-               /* Suppress heartbeat, even if turning off loopback */
-#define IPATH_NO_HRTBT      0x1000000
-#define IPATH_HAS_THRESH_UPDATE 0x4000000
-#define IPATH_HAS_MULT_IB_SPEED 0x8000000
-#define IPATH_IB_AUTONEG_INPROG 0x10000000
-#define IPATH_IB_AUTONEG_FAILED 0x20000000
-               /* Linkdown-disable intentionally, Do not attempt to bring up */
-#define IPATH_IB_LINK_DISABLED 0x40000000
-#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
-
-/* Bits in GPIO for the added interrupts */
-#define IPATH_GPIO_PORT0_BIT 2
-#define IPATH_GPIO_RXUVL_BIT 3
-#define IPATH_GPIO_OVRUN_BIT 4
-#define IPATH_GPIO_LLI_BIT 5
-#define IPATH_GPIO_ERRINTR_MASK 0x38
-
-/* portdata flag bit offsets */
-               /* waiting for a packet to arrive */
-#define IPATH_PORT_WAITING_RCV   2
-               /* master has not finished initializing */
-#define IPATH_PORT_MASTER_UNINIT 4
-               /* waiting for an urgent packet to arrive */
-#define IPATH_PORT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                               unsigned len, int avail);
-void ipath_init_iba6110_funcs(struct ipath_devdata *);
-void ipath_get_eeprom_info(struct ipath_devdata *);
-int ipath_update_eeprom_log(struct ipath_devdata *dd);
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
-u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
-void ipath_force_pio_avail_update(struct ipath_devdata *);
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
-
-/* send dma routines */
-int setup_sdma(struct ipath_devdata *);
-void teardown_sdma(struct ipath_devdata *);
-void ipath_restart_sdma(struct ipath_devdata *);
-void ipath_sdma_intr(struct ipath_devdata *);
-int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
-                         u32, struct ipath_verbs_txreq *);
-/* ipath_sdma_lock should be locked before calling this. */
-int ipath_sdma_make_progress(struct ipath_devdata *dd);
-
-/* must be called under ipath_sdma_lock */
-static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
-{
-       return dd->ipath_sdma_descq_cnt -
-               (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
-               1 - dd->ipath_sdma_desc_nreserved;
-}
-
-static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved += cnt;
-}
-
-static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved -= cnt;
-}
-
-/*
- * number of words used for protocol header if not set by ipath_userinit();
- */
-#define IPATH_DFLT_RCVHDRSIZE 9
-
-int ipath_get_user_pages(unsigned long, size_t, struct page **);
-void ipath_release_user_pages(struct page **, size_t);
-void ipath_release_user_pages_on_close(struct page **, size_t);
-int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
-int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
-int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
-int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
-
-/* these are used for the registers that vary with port */
-void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
-                          unsigned, u64);
-
-/*
- * We could have a single register get/put routine, that takes a group type,
- * but this is somewhat clearer and cleaner.  It also gives us some error
- * checking.  64 bit register reads should always work, but are inefficient
- * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
- * so we use kreg32 wherever possible.  User register and counter register
- * reads are always 32 bit reads, so only one form of those routines.
- */
-
-/*
- * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg().
- */
-
-/**
- * ipath_read_ureg32 - read 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @port: port number
- *
- * Return the contents of a register that is virtualized to be per port.
- * Returns -1 on errors (not distinguishable from valid contents at
- * runtime; we may add a separate error variable at some point).
- */
-static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, int port)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_uregbase +
-                     (char __iomem *)dd->ipath_kregbase +
-                     dd->ipath_ureg_align * port));
-}
-
-/**
- * ipath_write_ureg - write 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @value: value
- * @port: port
- *
- * Write the contents of a register that is virtualized to be per port.
- */
-static inline void ipath_write_ureg(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, u64 value, int port)
-{
-       u64 __iomem *ubase = (u64 __iomem *)
-               (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
-                dd->ipath_ureg_align * port);
-       if (dd->ipath_kregbase)
-               writeq(value, &ubase[regno]);
-}
-
-static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-       return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-
-       return readq(&dd->ipath_kregbase[regno]);
-}
-
-static inline void ipath_write_kreg(const struct ipath_devdata *dd,
-                                   ipath_kreg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, &dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
-                                 ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readq(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
-                                        ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_write_creg(const struct ipath_devdata *dd,
-                                   ipath_creg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, regno + (u64 __iomem *)
-                      (dd->ipath_cregbase +
-                       (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       return (u32) le64_to_cpu(*((volatile __le64 *)
-                               pd->port_rcvhdrtail_kvaddr));
-}
-
-static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
-{
-       const struct ipath_devdata *dd = pd->port_dd;
-       u32 hdrqtail;
-
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               __le32 *rhf_addr;
-               u32 seq;
-
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       pd->port_head + dd->ipath_rhf_offset;
-               seq = ipath_hdrget_seq(rhf_addr);
-               hdrqtail = pd->port_head;
-               if (seq == pd->port_seq_cnt)
-                       hdrqtail++;
-       } else
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-
-       return hdrqtail;
-}
-
-static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
-{
-       return (dd->ipath_flags & IPATH_INTREG_64) ?
-               ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return linkstate
- * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
- * everywhere, anyway (and should be, for almost all purposes).
- */
-static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
-               INFINIPATH_IBCS_LINKSTATE_MASK;
-       if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
-               state = INFINIPATH_IBCS_L_STATE_ACTIVE;
-       return state;
-}
-
-/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
-static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return logical link state
- * combination of link state and linktraining state (down, active, init,
- * arm, etc.
- */
-static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 ibs;
-       ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-       ibs |= (u32)(ibcs &
-               (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
-       return ibs;
-}
-
-/*
- * sysfs interface.
- */
-
-struct device_driver;
-
-extern const char ib_ipath_version[];
-
-extern const struct attribute_group *ipath_driver_attr_groups[];
-
-int ipath_device_create_group(struct device *, struct ipath_devdata *);
-void ipath_device_remove_group(struct device *, struct ipath_devdata *);
-int ipath_expose_reset(struct device *);
-
-int ipath_init_ipathfs(void);
-void ipath_exit_ipathfs(void);
-int ipathfs_add_device(struct ipath_devdata *);
-int ipathfs_remove_device(struct ipath_devdata *);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
-                         size_t, int);
-dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
-const char *ipath_get_unit_name(int unit);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-#if defined(CONFIG_X86_64)
-#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
-#else
-#define ipath_flush_wc() wmb()
-#endif
-
-extern unsigned ipath_debug; /* debugging bit mask */
-extern unsigned ipath_linkrecovery;
-extern unsigned ipath_mtu4096;
-extern struct mutex ipath_mutex;
-
-#define IPATH_DRV_NAME         "ib_ipath"
-#define IPATH_MAJOR            233
-#define IPATH_USER_MINOR_BASE  0
-#define IPATH_DIAGPKT_MINOR    127
-#define IPATH_DIAG_MINOR_BASE  129
-#define IPATH_NMINORS          255
-
-#define ipath_dev_err(dd,fmt,...) \
-       do { \
-               const struct ipath_devdata *__dd = (dd); \
-               if (__dd->pcidev) \
-                       dev_err(&__dd->pcidev->dev, "%s: " fmt, \
-                               ipath_get_unit_name(__dd->ipath_unit), \
-                               ##__VA_ARGS__); \
-               else \
-                       printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
-                              ipath_get_unit_name(__dd->ipath_unit), \
-                              ##__VA_ARGS__); \
-       } while (0)
-
-#if _IPATH_DEBUGGING
-
-# define __IPATH_DBG_WHICH(which,fmt,...) \
-       do { \
-               if (unlikely(ipath_debug & (which))) \
-                       printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
-                              __func__,##__VA_ARGS__); \
-       } while(0)
-
-# define ipath_dbg(fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
-# define ipath_cdbg(which,fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
-
-#else /* ! _IPATH_DEBUGGING */
-
-# define ipath_dbg(fmt,...)
-# define ipath_cdbg(which,fmt,...)
-
-#endif /* _IPATH_DEBUGGING */
-
-/*
- * this is used for formatting hw error messages...
- */
-struct ipath_hwerror_msgs {
-       u64 mask;
-       const char *msg;
-};
-
-#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
-
-/* in ipath_intr.c... */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t lmsg);
-
-#endif                         /* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644 (file)
index c0e933f..0000000
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
- * @mr: memory region that this lkey protects
- *
- * Returns 1 if successful, otherwise returns 0.
- */
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
-{
-       unsigned long flags;
-       u32 r;
-       u32 n;
-       int ret;
-
-       spin_lock_irqsave(&rkt->lock, flags);
-
-       /* Find the next available LKEY */
-       r = n = rkt->next;
-       for (;;) {
-               if (rkt->table[r] == NULL)
-                       break;
-               r = (r + 1) & (rkt->max - 1);
-               if (r == n) {
-                       spin_unlock_irqrestore(&rkt->lock, flags);
-                       ipath_dbg("LKEY table full\n");
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       rkt->next = (r + 1) & (rkt->max - 1);
-       /*
-        * Make sure lkey is never zero which is reserved to indicate an
-        * unrestricted LKEY.
-        */
-       rkt->gen++;
-       mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
-               ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
-                << 8);
-       if (mr->lkey == 0) {
-               mr->lkey |= 1 << 8;
-               rkt->gen++;
-       }
-       rkt->table[r] = mr;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
- */
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
-{
-       unsigned long flags;
-       u32 r;
-
-       if (lkey == 0)
-               return;
-       r = lkey >> (32 - ib_ipath_lkey_table_size);
-       spin_lock_irqsave(&rkt->lock, flags);
-       rkt->table[r] = NULL;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * ipath_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc)
-{
-       struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use LKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (sge->lkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               isge->mr = NULL;
-               isge->vaddr = (void *) sge->addr;
-               isge->length = sge->length;
-               isge->sge_length = sge->length;
-               ret = 1;
-               goto bail;
-       }
-       mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = sge->addr - mr->user_base;
-       if (unlikely(sge->addr < mr->user_base ||
-                    off + sge->length > mr->length ||
-                    (mr->access_flags & acc) != acc)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       isge->mr = mr;
-       isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       isge->length = mr->map[m]->segs[n].length - off;
-       isge->sge_length = sge->length;
-       isge->m = m;
-       isge->n = n;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- */
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_lkey_table *rkt = &dev->lk_table;
-       struct ipath_sge *sge = &ss->sge;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use RKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (rkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               sge->mr = NULL;
-               sge->vaddr = (void *) vaddr;
-               sge->length = len;
-               sge->sge_length = len;
-               ss->sg_list = NULL;
-               ss->num_sge = 1;
-               ret = 1;
-               goto bail;
-       }
-
-       mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != rkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = vaddr - mr->iova;
-       if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-                    (mr->access_flags & acc) == 0)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       sge->mr = mr;
-       sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       sge->length = mr->map[m]->segs[n].length - off;
-       sge->sge_length = len;
-       sge->m = m;
-       sge->n = n;
-       ss->sg_list = NULL;
-       ss->num_sge = 1;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644 (file)
index ad3a926..0000000
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_pma.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define IB_SMP_UNSUP_VERSION   cpu_to_be16(0x0004)
-#define IB_SMP_UNSUP_METHOD    cpu_to_be16(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
-#define IB_SMP_INVALID_FIELD   cpu_to_be16(0x001C)
-
-static int reply(struct ib_smp *smp)
-{
-       /*
-        * The verbs framework will handle the directed/LID route
-        * packet changes.
-        */
-       smp->method = IB_MGMT_METHOD_GET_RESP;
-       if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-               smp->status |= IB_SMP_DIRECTION;
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-static int recv_subn_get_nodedescription(struct ib_smp *smp,
-                                        struct ib_device *ibdev)
-{
-       if (smp->attr_mod)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
-
-       return reply(smp);
-}
-
-struct nodeinfo {
-       u8 base_version;
-       u8 class_version;
-       u8 node_type;
-       u8 num_ports;
-       __be64 sys_guid;
-       __be64 node_guid;
-       __be64 port_guid;
-       __be16 partition_cap;
-       __be16 device_id;
-       __be32 revision;
-       u8 local_port_num;
-       u8 vendor_id[3];
-} __attribute__ ((packed));
-
-static int recv_subn_get_nodeinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
-       struct ipath_devdata *dd = to_idev(ibdev)->dd;
-       u32 vendor, majrev, minrev;
-
-       /* GUID 0 is illegal */
-       if (smp->attr_mod || (dd->ipath_guid == 0))
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       nip->base_version = 1;
-       nip->class_version = 1;
-       nip->node_type = 1;     /* channel adapter */
-       /*
-        * XXX The num_ports value will need a layer function to get
-        * the value if we ever have more than one IB port on a chip.
-        * We will also need to get the GUID for the port.
-        */
-       nip->num_ports = ibdev->phys_port_cnt;
-       /* This is already in network order */
-       nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-       nip->node_guid = dd->ipath_guid;
-       nip->port_guid = dd->ipath_guid;
-       nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
-       nip->device_id = cpu_to_be16(dd->ipath_deviceid);
-       majrev = dd->ipath_majrev;
-       minrev = dd->ipath_minrev;
-       nip->revision = cpu_to_be32((majrev << 16) | minrev);
-       nip->local_port_num = port;
-       vendor = dd->ipath_vendorid;
-       nip->vendor_id[0] = IPATH_SRC_OUI_1;
-       nip->vendor_id[1] = IPATH_SRC_OUI_2;
-       nip->vendor_id[2] = IPATH_SRC_OUI_3;
-
-       return reply(smp);
-}
-
-static int recv_subn_get_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
-       __be64 *p = (__be64 *) smp->data;
-
-       /* 32 blocks of 8 64-bit GUIDs per block */
-
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /*
-        * We only support one GUID for now.  If this changes, the
-        * portinfo.guid_cap field needs to be updated too.
-        */
-       if (startgx == 0) {
-               __be64 g = to_idev(ibdev)->dd->ipath_guid;
-               if (g == 0)
-                       /* GUID 0 is illegal */
-                       smp->status |= IB_SMP_INVALID_FIELD;
-               else
-                       /* The first is a copy of the read-only HW GUID. */
-                       *p = g;
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
-}
-
-static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
-}
-
-static int get_overrunthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-static int get_phyerrthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-/**
- * get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-static int get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-       return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-static int recv_subn_get_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       u16 lid;
-       u8 ibcstat;
-       u8 mtu;
-       int ret;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
-               smp->status |= IB_SMP_INVALID_FIELD;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-
-       /* Clear all fields.  Only set the non-zero fields. */
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /* Only return the mkey if the protection field allows it. */
-       if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-           dev->mkeyprot == 0)
-               pip->mkey = dev->mkey;
-       pip->gid_prefix = dev->gid_prefix;
-       lid = dd->ipath_lid;
-       pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
-       pip->sm_lid = cpu_to_be16(dev->sm_lid);
-       pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
-       /* pip->diag_code; */
-       pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
-       pip->local_port_num = port;
-       pip->link_width_enabled = dd->ipath_link_width_enabled;
-       pip->link_width_supported = dd->ipath_link_width_supported;
-       pip->link_width_active = dd->ipath_link_width_active;
-       pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       pip->portphysstate_linkdown =
-               (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
-               (get_linkdowndefaultstate(dd) ? 1 : 2);
-       pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
-       pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
-               dd->ipath_link_speed_enabled;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:                /* oops, something is wrong */
-               mtu = IB_MTU_2048;
-               break;
-       }
-       pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
-       pip->vlcap_inittype = 0x10;     /* VLCap = VL0, InitType = 0 */
-       pip->vl_high_limit = dev->vl_high_limit;
-       /* pip->vl_arb_high_cap; // only one VL */
-       /* pip->vl_arb_low_cap; // only one VL */
-       /* InitTypeReply = 0 */
-       /* our mtu cap depends on whether 4K MTU enabled or not */
-       pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       /* HCAs ignore VLStallCount and HOQLife */
-       /* pip->vlstallcnt_hoqlife; */
-       pip->operationalvl_pei_peo_fpi_fpo = 0x10;      /* OVLs = 1 */
-       pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
-       /* P_KeyViolations are counted by hardware. */
-       pip->pkey_violations =
-               cpu_to_be16((ipath_get_cr_errpkey(dd) -
-                            dev->z_pkey_violations) & 0xFFFF);
-       pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
-       /* Only the hardware GUID is supported for now */
-       pip->guid_cap = 1;
-       pip->clientrereg_resv_subnetto = dev->subnet_timeout;
-       /* 32.768 usec. response time (guessing) */
-       pip->resv_resptimevalue = 3;
-       pip->localphyerrors_overrunerrors =
-               (get_phyerrthreshold(dd) << 4) |
-               get_overrunthreshold(dd);
-       /* pip->max_credit_hint; */
-       if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
-               u32 v;
-
-               v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
-               pip->link_roundtrip_latency[0] = v >> 16;
-               pip->link_roundtrip_latency[1] = v >> 8;
-               pip->link_roundtrip_latency[2] = v;
-       }
-
-       ret = reply(smp);
-
-bail:
-       return ret;
-}
-
-/**
- * get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-       /* always a kernel port, no locking needed */
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-
-       memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-       return 0;
-}
-
-static int recv_subn_get_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       u16 *p = (u16 *) smp->data;
-       __be16 *q = (__be16 *) smp->data;
-
-       /* 64 blocks of 32 16-bit P_Key entries */
-
-       memset(smp->data, 0, sizeof(smp->data));
-       if (startpx == 0) {
-               struct ipath_ibdev *dev = to_idev(ibdev);
-               unsigned i, n = ipath_get_npkeys(dev->dd);
-
-               get_pkeys(dev->dd, p);
-
-               for (i = 0; i < n; i++)
-                       q[i] = cpu_to_be16(p[i]);
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static int recv_subn_set_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       /* The only GUID we support is the first read-only entry. */
-       return recv_subn_get_guidinfo(smp, ibdev);
-}
-
-/**
- * set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
-{
-       if (sleep)
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       else
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl);
-       return 0;
-}
-
-/**
- * recv_subn_set_portinfo - set port information
- * @smp: the incoming SM packet
- * @ibdev: the infiniband device
- * @port: the port on the device
- *
- * Set Portinfo (see ch. 14.2.5.6).
- */
-static int recv_subn_set_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       struct ib_event event;
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       char clientrereg = 0;
-       u16 lid, smlid;
-       u8 lwe;
-       u8 lse;
-       u8 state;
-       u16 lstate;
-       u32 mtu;
-       int ret, ore;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
-               goto err;
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-       event.device = ibdev;
-       event.element.port_num = port;
-
-       dev->mkey = pip->mkey;
-       dev->gid_prefix = pip->gid_prefix;
-       dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
-
-       lid = be16_to_cpu(pip->lid);
-       if (dd->ipath_lid != lid ||
-           dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
-               /* Must be a valid unicast LID address. */
-               if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
-               event.event = IB_EVENT_LID_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       smlid = be16_to_cpu(pip->sm_lid);
-       if (smlid != dev->sm_lid) {
-               /* Must be a valid unicast LID address. */
-               if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               dev->sm_lid = smlid;
-               event.event = IB_EVENT_SM_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       /* Allow 1x or 4x to be set (see 14.2.6.6). */
-       lwe = pip->link_width_enabled;
-       if (lwe) {
-               if (lwe == 0xFF)
-                       lwe = dd->ipath_link_width_supported;
-               else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
-                       goto err;
-               set_link_width_enabled(dd, lwe);
-       }
-
-       /* Allow 2.5 or 5.0 Gbs. */
-       lse = pip->linkspeedactive_enabled & 0xF;
-       if (lse) {
-               if (lse == 15)
-                       lse = dd->ipath_link_speed_supported;
-               else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
-                       goto err;
-               set_link_speed_enabled(dd, lse);
-       }
-
-       /* Set link down default state. */
-       switch (pip->portphysstate_linkdown & 0xF) {
-       case 0: /* NOP */
-               break;
-       case 1: /* SLEEP */
-               if (set_linkdowndefaultstate(dd, 1))
-                       goto err;
-               break;
-       case 2: /* POLL */
-               if (set_linkdowndefaultstate(dd, 0))
-                       goto err;
-               break;
-       default:
-               goto err;
-       }
-
-       dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-       dev->vl_high_limit = pip->vl_high_limit;
-
-       switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
-       case IB_MTU_256:
-               mtu = 256;
-               break;
-       case IB_MTU_512:
-               mtu = 512;
-               break;
-       case IB_MTU_1024:
-               mtu = 1024;
-               break;
-       case IB_MTU_2048:
-               mtu = 2048;
-               break;
-       case IB_MTU_4096:
-               if (!ipath_mtu4096)
-                       goto err;
-               mtu = 4096;
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-       ipath_set_mtu(dd, mtu);
-
-       dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
-
-       /* We only support VL0 */
-       if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
-               goto err;
-
-       if (pip->mkey_violations == 0)
-               dev->mkey_violations = 0;
-
-       /*
-        * Hardware counter can't be reset so snapshot and subtract
-        * later.
-        */
-       if (pip->pkey_violations == 0)
-               dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
-
-       if (pip->qkey_violations == 0)
-               dev->qkey_violations = 0;
-
-       ore = pip->localphyerrors_overrunerrors;
-       if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
-               goto err;
-
-       if (set_overrunthreshold(dd, (ore & 0xF)))
-               goto err;
-
-       dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
-
-       if (pip->clientrereg_resv_subnetto & 0x80) {
-               clientrereg = 1;
-               event.event = IB_EVENT_CLIENT_REREGISTER;
-               ib_dispatch_event(&event);
-       }
-
-       /*
-        * Do the port state change now that the other link parameters
-        * have been set.
-        * Changing the port physical state only makes sense if the link
-        * is down or is being set to down.
-        */
-       state = pip->linkspeed_portstate & 0xF;
-       lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
-       if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-               goto err;
-
-       /*
-        * Only state changes of DOWN, ARM, and ACTIVE are valid
-        * and must be in the correct state to take effect (see 7.2.6).
-        */
-       switch (state) {
-       case IB_PORT_NOP:
-               if (lstate == 0)
-                       break;
-               /* FALLTHROUGH */
-       case IB_PORT_DOWN:
-               if (lstate == 0)
-                       lstate = IPATH_IB_LINKDOWN_ONLY;
-               else if (lstate == 1)
-                       lstate = IPATH_IB_LINKDOWN_SLEEP;
-               else if (lstate == 2)
-                       lstate = IPATH_IB_LINKDOWN;
-               else if (lstate == 3)
-                       lstate = IPATH_IB_LINKDOWN_DISABLE;
-               else
-                       goto err;
-               ipath_set_linkstate(dd, lstate);
-               if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
-                       ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-                       goto done;
-               }
-               ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
-                               IPATH_LINKACTIVE, 1000);
-               break;
-       case IB_PORT_ARMED:
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               break;
-       case IB_PORT_ACTIVE:
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-       if (clientrereg)
-               pip->clientrereg_resv_subnetto |= 0x80;
-
-       goto done;
-
-err:
-       smp->status |= IB_SMP_INVALID_FIELD;
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-done:
-       return ret;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (dd->ipath_pkeys[i] != key)
-                       continue;
-               if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-                       dd->ipath_pkeys[i] = 0;
-                       ret = 1;
-                       goto bail;
-               }
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       u16 lkey = key & 0x7FFF;
-       int any = 0;
-       int ret;
-
-       if (lkey == 0x7FFF) {
-               ret = 0;
-               goto bail;
-       }
-
-       /* Look for an empty slot or a matching PKEY. */
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               /* If it matches exactly, try to increment the ref count */
-               if (dd->ipath_pkeys[i] == key) {
-                       if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-                               ret = 0;
-                               goto bail;
-                       }
-                       /* Lost the race. Look for an empty slot below. */
-                       atomic_dec(&dd->ipath_pkeyrefs[i]);
-                       any++;
-               }
-               /*
-                * It makes no sense to have both the limited and unlimited
-                * PKEY set at the same time since the unlimited one will
-                * disable the limited one.
-                */
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       dd->ipath_pkeys[i] = key;
-                       ret = 1;
-                       goto bail;
-               }
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
-{
-       struct ipath_portdata *pd;
-       int i;
-       int changed = 0;
-
-       /* always a kernel port, no locking needed */
-       pd = dd->ipath_pd[0];
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               u16 key = pkeys[i];
-               u16 okey = pd->port_pkeys[i];
-
-               if (key == okey)
-                       continue;
-               /*
-                * The value of this PKEY table entry is changing.
-                * Remove the old entry in the hardware's array of PKEYs.
-                */
-               if (okey & 0x7FFF)
-                       changed |= rm_pkey(dd, okey);
-               if (key & 0x7FFF) {
-                       int ret = add_pkey(dd, key);
-
-                       if (ret < 0)
-                               key = 0;
-                       else
-                               changed |= ret;
-               }
-               pd->port_pkeys[i] = key;
-       }
-       if (changed) {
-               u64 pkey;
-               struct ib_event event;
-
-               pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-
-               event.event = IB_EVENT_PKEY_CHANGE;
-               event.device = &dd->verbs_dev->ibdev;
-               event.element.port_num = port;
-               ib_dispatch_event(&event);
-       }
-       return 0;
-}
-
-static int recv_subn_set_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev, u8 port)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       __be16 *p = (__be16 *) smp->data;
-       u16 *q = (u16 *) smp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       unsigned i, n = ipath_get_npkeys(dev->dd);
-
-       for (i = 0; i < n; i++)
-               q[i] = be16_to_cpu(p[i]);
-
-       if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return recv_subn_get_pkeytable(smp, ibdev);
-}
-
-static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
-{
-       struct ib_class_port_info *p =
-               (struct ib_class_port_info *)pmp->data;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       if (pmp->mad_hdr.attr_mod != 0)
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       /* Indicate AllPortSelect is valid (only one port anyway) */
-       p->capability_mask = cpu_to_be16(1 << 8);
-       p->base_version = 1;
-       p->class_version = 1;
-       /*
-        * Expected response time is 4.096 usec. * 2^18 == 1.073741824
-        * sec.
-        */
-       p->resp_time_value = 18;
-
-       return reply((struct ib_smp *) pmp);
-}
-
-/*
- * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
- * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
- * We support 5 counters which only count the mandatory quantities.
- */
-#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
-                                   COUNTER_MASK(1, 1) | \
-                                   COUNTER_MASK(1, 2) | \
-                                   COUNTER_MASK(1, 3) | \
-                                   COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 port_select = p->port_select;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-       /*
-        * Ticks are 10x the link transfer period which for 2.5Gbs is 4
-        * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
-        * intervals are counted in ticks.  Since we use Linux timers, that
-        * count in jiffies, we can't sample for less than 1000 ticks if HZ
-        * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
-        * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
-        * have hardware support for delaying packets.
-        */
-       if (crp->cr_psstat)
-               p->tick = dev->dd->ipath_link_speed_active - 1;
-       else
-               p->tick = 250;          /* 1 usec. */
-       p->counter_width = 4;   /* 32 bit counters */
-       p->counter_mask0_9 = COUNTER_MASK0_9;
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               p->sample_status = dev->pma_sample_status;
-       p->sample_start = cpu_to_be32(dev->pma_sample_start);
-       p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
-       p->tag = cpu_to_be16(dev->pma_tag);
-       p->counter_select[0] = dev->pma_counter_select[0];
-       p->counter_select[1] = dev->pma_counter_select[1];
-       p->counter_select[2] = dev->pma_counter_select[2];
-       p->counter_select[3] = dev->pma_counter_select[3];
-       p->counter_select[4] = dev->pma_counter_select[4];
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 status;
-       int ret;
-
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (p->port_select != port && p->port_select != 0xFF)) {
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       if (status == IB_PMA_SAMPLE_STATUS_DONE) {
-               dev->pma_sample_start = be32_to_cpu(p->sample_start);
-               dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
-               dev->pma_tag = be16_to_cpu(p->tag);
-               dev->pma_counter_select[0] = p->counter_select[0];
-               dev->pma_counter_select[1] = p->counter_select[1];
-               dev->pma_counter_select[2] = p->counter_select[2];
-               dev->pma_counter_select[3] = p->counter_select[3];
-               dev->pma_counter_select[4] = p->counter_select[4];
-               if (crp->cr_psstat) {
-                       ipath_write_creg(dev->dd, crp->cr_psinterval,
-                                        dev->pma_sample_interval);
-                       ipath_write_creg(dev->dd, crp->cr_psstart,
-                                        dev->pma_sample_start);
-               } else
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
-
-bail:
-       return ret;
-}
-
-static u64 get_counter(struct ipath_ibdev *dev,
-                      struct ipath_cregs const *crp,
-                      __be16 sel)
-{
-       u64 ret;
-
-       switch (sel) {
-       case IB_PMA_PORT_XMIT_DATA:
-               ret = (crp->cr_psxmitdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
-                       dev->ipath_sword;
-               break;
-       case IB_PMA_PORT_RCV_DATA:
-               ret = (crp->cr_psrcvdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
-                       dev->ipath_rword;
-               break;
-       case IB_PMA_PORT_XMIT_PKTS:
-               ret = (crp->cr_psxmitpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
-                       dev->ipath_spkts;
-               break;
-       case IB_PMA_PORT_RCV_PKTS:
-               ret = (crp->cr_psrcvpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
-                       dev->ipath_rpkts;
-               break;
-       case IB_PMA_PORT_XMIT_WAIT:
-               ret = (crp->cr_psxmitwaitcount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
-                       dev->ipath_xmit_wait;
-               break;
-       default:
-               ret = 0;
-       }
-
-       return ret;
-}
-
-static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
-                                         struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult *p =
-               (struct ib_pma_portsamplesresult *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be32(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
-                                             struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult_ext *p =
-               (struct ib_pma_portsamplesresult_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       /* 64 bits */
-       p->extended_width = cpu_to_be32(0x80000000);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be64(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-       u8 port_select = p->port_select;
-
-       ipath_get_counters(dev->dd, &cntrs);
-
-       /* Adjust counters for any resets done. */
-       cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
-       cntrs.link_error_recovery_counter -=
-               dev->z_link_error_recovery_counter;
-       cntrs.link_downed_counter -= dev->z_link_downed_counter;
-       cntrs.port_rcv_errors += dev->rcv_errors;
-       cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
-       cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
-       cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
-       cntrs.port_xmit_data -= dev->z_port_xmit_data;
-       cntrs.port_rcv_data -= dev->z_port_rcv_data;
-       cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
-       cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
-       cntrs.local_link_integrity_errors -=
-               dev->z_local_link_integrity_errors;
-       cntrs.excessive_buffer_overrun_errors -=
-               dev->z_excessive_buffer_overrun_errors;
-       cntrs.vl15_dropped -= dev->z_vl15_dropped;
-       cntrs.vl15_dropped += dev->n_vl15_dropped;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       if (cntrs.symbol_error_counter > 0xFFFFUL)
-               p->symbol_error_counter = cpu_to_be16(0xFFFF);
-       else
-               p->symbol_error_counter =
-                       cpu_to_be16((u16)cntrs.symbol_error_counter);
-       if (cntrs.link_error_recovery_counter > 0xFFUL)
-               p->link_error_recovery_counter = 0xFF;
-       else
-               p->link_error_recovery_counter =
-                       (u8)cntrs.link_error_recovery_counter;
-       if (cntrs.link_downed_counter > 0xFFUL)
-               p->link_downed_counter = 0xFF;
-       else
-               p->link_downed_counter = (u8)cntrs.link_downed_counter;
-       if (cntrs.port_rcv_errors > 0xFFFFUL)
-               p->port_rcv_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_errors =
-                       cpu_to_be16((u16) cntrs.port_rcv_errors);
-       if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
-               p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_remphys_errors =
-                       cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
-       if (cntrs.port_xmit_discards > 0xFFFFUL)
-               p->port_xmit_discards = cpu_to_be16(0xFFFF);
-       else
-               p->port_xmit_discards =
-                       cpu_to_be16((u16)cntrs.port_xmit_discards);
-       if (cntrs.local_link_integrity_errors > 0xFUL)
-               cntrs.local_link_integrity_errors = 0xFUL;
-       if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
-               cntrs.excessive_buffer_overrun_errors = 0xFUL;
-       p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
-               cntrs.excessive_buffer_overrun_errors;
-       if (cntrs.vl15_dropped > 0xFFFFUL)
-               p->vl15_dropped = cpu_to_be16(0xFFFF);
-       else
-               p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
-       if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
-               p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
-       if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
-               p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
-       if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
-               p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_packets =
-                       cpu_to_be32((u32)cntrs.port_xmit_packets);
-       if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
-               p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_packets =
-                       cpu_to_be32((u32) cntrs.port_rcv_packets);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters_ext *p =
-               (struct ib_pma_portcounters_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-       u8 port_select = p->port_select;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       /* Adjust counters for any resets done. */
-       swords -= dev->z_port_xmit_data;
-       rwords -= dev->z_port_rcv_data;
-       spkts -= dev->z_port_xmit_packets;
-       rpkts -= dev->z_port_rcv_packets;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       p->port_xmit_data = cpu_to_be64(swords);
-       p->port_rcv_data = cpu_to_be64(rwords);
-       p->port_xmit_packets = cpu_to_be64(spkts);
-       p->port_rcv_packets = cpu_to_be64(rpkts);
-       p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
-       p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
-       p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
-       p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-
-       /*
-        * Since the HW doesn't support clearing counters, we save the
-        * current count and subtract it from future responses.
-        */
-       ipath_get_counters(dev->dd, &cntrs);
-
-       if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
-               dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
-               dev->z_link_error_recovery_counter =
-                       cntrs.link_error_recovery_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
-               dev->z_link_downed_counter = cntrs.link_downed_counter;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
-               dev->z_port_rcv_errors =
-                       cntrs.port_rcv_errors + dev->rcv_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
-               dev->z_port_rcv_remphys_errors =
-                       cntrs.port_rcv_remphys_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
-               dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-
-       if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
-               dev->z_local_link_integrity_errors =
-                       cntrs.local_link_integrity_errors;
-
-       if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
-               dev->z_excessive_buffer_overrun_errors =
-                       cntrs.excessive_buffer_overrun_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-               dev->n_vl15_dropped = 0;
-               dev->z_vl15_dropped = cntrs.vl15_dropped;
-       }
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = cntrs.port_xmit_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
-               dev->z_port_rcv_data = cntrs.port_rcv_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-
-       return recv_pma_get_portcounters(pmp, ibdev, port);
-}
-
-static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = swords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
-               dev->z_port_rcv_data = rwords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = spkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = rpkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-               dev->n_unicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-               dev->n_unicast_rcv = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-               dev->n_multicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-               dev->n_multicast_rcv = 0;
-
-       return recv_pma_get_portcounters_ext(pmp, ibdev, port);
-}
-
-static int process_subn(struct ib_device *ibdev, int mad_flags,
-                       u8 port_num, const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_smp *smp = (struct ib_smp *)out_mad;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       *out_mad = *in_mad;
-       if (smp->class_version != 1) {
-               smp->status |= IB_SMP_UNSUP_VERSION;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       /* Is the mkey in the process of expiring? */
-       if (dev->mkey_lease_timeout &&
-           time_after_eq(jiffies, dev->mkey_lease_timeout)) {
-               /* Clear timeout and mkey protection field. */
-               dev->mkey_lease_timeout = 0;
-               dev->mkeyprot = 0;
-       }
-
-       /*
-        * M_Key checking depends on
-        * Portinfo:M_Key_protect_bits
-        */
-       if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
-           dev->mkey != smp->mkey &&
-           (smp->method == IB_MGMT_METHOD_SET ||
-            (smp->method == IB_MGMT_METHOD_GET &&
-             dev->mkeyprot >= 2))) {
-               if (dev->mkey_violations != 0xFFFF)
-                       ++dev->mkey_violations;
-               if (dev->mkey_lease_timeout ||
-                   dev->mkey_lease_period == 0) {
-                       ret = IB_MAD_RESULT_SUCCESS |
-                               IB_MAD_RESULT_CONSUMED;
-                       goto bail;
-               }
-               dev->mkey_lease_timeout = jiffies +
-                       dev->mkey_lease_period * HZ;
-               /* Future: Generate a trap notice. */
-               ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-               goto bail;
-       } else if (dev->mkey_lease_timeout)
-               dev->mkey_lease_timeout = 0;
-
-       switch (smp->method) {
-       case IB_MGMT_METHOD_GET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_NODE_DESC:
-                       ret = recv_subn_get_nodedescription(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_NODE_INFO:
-                       ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_get_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_get_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_get_pkeytable(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_set_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_set_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_TRAP:
-       case IB_MGMT_METHOD_REPORT:
-       case IB_MGMT_METHOD_REPORT_RESP:
-       case IB_MGMT_METHOD_TRAP_REPRESS:
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               smp->status |= IB_SMP_UNSUP_METHOD;
-               ret = reply(smp);
-       }
-
-bail:
-       return ret;
-}
-
-static int process_perf(struct ib_device *ibdev, u8 port_num,
-                       const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
-       int ret;
-
-       *out_mad = *in_mad;
-       if (pmp->mad_hdr.class_version != 1) {
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       switch (pmp->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_CLASS_PORT_INFO:
-                       ret = recv_pma_get_classportinfo(pmp);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT:
-                       ret = recv_pma_get_portsamplesresult(pmp, ibdev);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT_EXT:
-                       ret = recv_pma_get_portsamplesresult_ext(pmp,
-                                                                ibdev);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_get_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_get_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_set_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_set_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
-               ret = reply((struct ib_smp *) pmp);
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_process_mad - process an incoming MAD packet
- * @ibdev: the infiniband device this packet came in on
- * @mad_flags: MAD flags
- * @port_num: the port number this packet came in on
- * @in_wc: the work completion entry for this packet
- * @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
- *
- * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
- * interested in processing.
- *
- * Note that the verbs framework has already done the MAD sanity checks,
- * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
- * MADs.
- *
- * This is called by the ib_mad module.
- */
-int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       switch (in_mad->mad_hdr.mgmt_class) {
-       case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-       case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-               ret = process_subn(ibdev, mad_flags, port_num,
-                                  in_mad, out_mad);
-               goto bail;
-       case IB_MGMT_CLASS_PERF_MGMT:
-               ret = process_perf(ibdev, port_num, in_mad, out_mad);
-               goto bail;
-       default:
-               ret = IB_MAD_RESULT_SUCCESS;
-       }
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644 (file)
index e732742..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct ipath_mmap_info
- */
-void ipath_release_mmap_info(struct kref *ref)
-{
-       struct ipath_mmap_info *ip =
-               container_of(ref, struct ipath_mmap_info, ref);
-       struct ipath_ibdev *dev = to_idev(ip->context->device);
-
-       spin_lock_irq(&dev->pending_lock);
-       list_del(&ip->pending_mmaps);
-       spin_unlock_irq(&dev->pending_lock);
-
-       vfree(ip->obj);
-       kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void ipath_vma_open(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_get(&ip->ref);
-}
-
-static void ipath_vma_close(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_put(&ip->ref, ipath_release_mmap_info);
-}
-
-static const struct vm_operations_struct ipath_vm_ops = {
-       .open =     ipath_vma_open,
-       .close =    ipath_vma_close,
-};
-
-/**
- * ipath_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       struct ipath_ibdev *dev = to_idev(context->device);
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-       unsigned long size = vma->vm_end - vma->vm_start;
-       struct ipath_mmap_info *ip, *pp;
-       int ret = -EINVAL;
-
-       /*
-        * Search the device's list of objects waiting for a mmap call.
-        * Normally, this list is very short since a call to create a
-        * CQ, QP, or SRQ is soon followed by a call to mmap().
-        */
-       spin_lock_irq(&dev->pending_lock);
-       list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-                                pending_mmaps) {
-               /* Only the creator is allowed to mmap the object */
-               if (context != ip->context || (__u64) offset != ip->offset)
-                       continue;
-               /* Don't allow a mmap larger than the object. */
-               if (size > ip->size)
-                       break;
-
-               list_del_init(&ip->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-
-               ret = remap_vmalloc_range(vma, ip->obj, 0);
-               if (ret)
-                       goto done;
-               vma->vm_ops = &ipath_vm_ops;
-               vma->vm_private_data = ip;
-               ipath_vma_open(vma);
-               goto done;
-       }
-       spin_unlock_irq(&dev->pending_lock);
-done:
-       return ret;
-}
-
-/*
- * Allocate information for ipath_mmap
- */
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj) {
-       struct ipath_mmap_info *ip;
-
-       ip = kmalloc(sizeof *ip, GFP_KERNEL);
-       if (!ip)
-               goto bail;
-
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       INIT_LIST_HEAD(&ip->pending_mmaps);
-       ip->size = size;
-       ip->context = context;
-       ip->obj = obj;
-       kref_init(&ip->ref);
-
-bail:
-       return ip;
-}
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj) {
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       ip->size = size;
-       ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644 (file)
index c7278f6..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/slab.h>
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-
-/* Fast memory region */
-struct ipath_fmr {
-       struct ib_fmr ibfmr;
-       u8 page_shift;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-       return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
-/**
- * ipath_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see ipath_dma.c).
- */
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ipath_mr *mr;
-       struct ib_mr *ret;
-
-       mr = kzalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.access_flags = acc;
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-static struct ipath_mr *alloc_mr(int count,
-                                struct ipath_lkey_table *lk_table)
-{
-       struct ipath_mr *mr;
-       int m, i = 0;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
-       if (!mr)
-               goto done;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
-               if (!mr->mr.map[i])
-                       goto bail;
-       }
-       mr->mr.mapsz = m;
-
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
-       if (!ipath_alloc_lkey(lk_table, &mr->mr))
-               goto bail;
-       mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
-
-       goto done;
-
-bail:
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-       kfree(mr);
-       mr = NULL;
-
-done:
-       return mr;
-}
-
-/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct ipath_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
-       if (mr == NULL) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.length = 0;
-       mr->mr.offset = 0;
-       mr->mr.access_flags = acc;
-       mr->mr.max_segs = num_phys_buf;
-       mr->umem = NULL;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the InfiniPath driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata)
-{
-       struct ipath_mr *mr;
-       struct ib_umem *umem;
-       int n, m, entry;
-       struct scatterlist *sg;
-       struct ib_mr *ret;
-
-       if (length == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       umem = ib_umem_get(pd->uobject->context, start, length,
-                          mr_access_flags, 0);
-       if (IS_ERR(umem))
-               return (void *) umem;
-
-       n = umem->nmap;
-       mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               ib_umem_release(umem);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = start;
-       mr->mr.iova = virt_addr;
-       mr->mr.length = length;
-       mr->mr.offset = ib_umem_offset(umem);
-       mr->mr.access_flags = mr_access_flags;
-       mr->mr.max_segs = n;
-       mr->umem = umem;
-
-       m = 0;
-       n = 0;
-       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-               void *vaddr;
-
-               vaddr = page_address(sg_page(sg));
-               if (!vaddr) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               mr->mr.map[m]->segs[n].vaddr = vaddr;
-               mr->mr.map[m]->segs[n].length = umem->page_size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by ipath_get_dma_mr()
- * or ipath_reg_user_mr().
- */
-int ipath_dereg_mr(struct ib_mr *ibmr)
-{
-       struct ipath_mr *mr = to_imr(ibmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
-       i = mr->mr.mapsz;
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-
-       if (mr->umem)
-               ib_umem_release(mr->umem);
-
-       kfree(mr);
-       return 0;
-}
-
-/**
- * ipath_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr)
-{
-       struct ipath_fmr *fmr;
-       int m, i = 0;
-       struct ib_fmr *ret;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
-       if (!fmr)
-               goto bail;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
-                                        GFP_KERNEL);
-               if (!fmr->mr.map[i])
-                       goto bail;
-       }
-       fmr->mr.mapsz = m;
-
-       /*
-        * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-        * rkey.
-        */
-       if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
-               goto bail;
-       fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
-       /*
-        * Resources are allocated but no valid mapping (RKEY can't be
-        * used).
-        */
-       fmr->mr.pd = pd;
-       fmr->mr.user_base = 0;
-       fmr->mr.iova = 0;
-       fmr->mr.length = 0;
-       fmr->mr.offset = 0;
-       fmr->mr.access_flags = mr_access_flags;
-       fmr->mr.max_segs = fmr_attr->max_pages;
-       fmr->page_shift = fmr_attr->page_shift;
-
-       ret = &fmr->ibfmr;
-       goto done;
-
-bail:
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       ret = ERR_PTR(-ENOMEM);
-
-done:
-       return ret;
-}
-
-/**
- * ipath_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-       int m, n, i;
-       u32 ps;
-       int ret;
-
-       if (list_len > fmr->mr.max_segs) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       rkt = &to_idev(ibfmr->device)->lk_table;
-       spin_lock_irqsave(&rkt->lock, flags);
-       fmr->mr.user_base = iova;
-       fmr->mr.iova = iova;
-       ps = 1 << fmr->page_shift;
-       fmr->mr.length = list_len * ps;
-       m = 0;
-       n = 0;
-       ps = 1 << fmr->page_shift;
-       for (i = 0; i < list_len; i++) {
-               fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-               fmr->mr.map[m]->segs[n].length = ps;
-               if (++n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       spin_unlock_irqrestore(&rkt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int ipath_unmap_fmr(struct list_head *fmr_list)
-{
-       struct ipath_fmr *fmr;
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-
-       list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-               rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-               spin_lock_irqsave(&rkt->lock, flags);
-               fmr->mr.user_base = 0;
-               fmr->mr.iova = 0;
-               fmr->mr.length = 0;
-               spin_unlock_irqrestore(&rkt->lock, flags);
-       }
-       return 0;
-}
-
-/**
- * ipath_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
-       i = fmr->mr.mapsz;
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644 (file)
index 280cd2d..0000000
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-#define BITS_PER_PAGE          (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK     (BITS_PER_PAGE-1)
-#define mk_qpn(qpt, map, off)  (((map) - (qpt)->map) * BITS_PER_PAGE + \
-                                (off))
-#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
-                                                     BITS_PER_PAGE, off)
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
-       0,                      /* 0 */
-       1,                      /* 1 */
-       2,                      /* 2 */
-       3,                      /* 3 */
-       4,                      /* 4 */
-       6,                      /* 5 */
-       8,                      /* 6 */
-       12,                     /* 7 */
-       16,                     /* 8 */
-       24,                     /* 9 */
-       32,                     /* A */
-       48,                     /* B */
-       64,                     /* C */
-       96,                     /* D */
-       128,                    /* E */
-       192,                    /* F */
-       256,                    /* 10 */
-       384,                    /* 11 */
-       512,                    /* 12 */
-       768,                    /* 13 */
-       1024,                   /* 14 */
-       1536,                   /* 15 */
-       2048,                   /* 16 */
-       3072,                   /* 17 */
-       4096,                   /* 18 */
-       6144,                   /* 19 */
-       8192,                   /* 1A */
-       12288,                  /* 1B */
-       16384,                  /* 1C */
-       24576,                  /* 1D */
-       32768                   /* 1E */
-};
-
-
-static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
-{
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
-       unsigned long flags;
-
-       /*
-        * Free the page if someone raced with us installing it.
-        */
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       if (map->page)
-               free_page(page);
-       else
-               map->page = (void *)page;
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-
-static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
-{
-       u32 i, offset, max_scan, qpn;
-       struct qpn_map *map;
-       u32 ret = -1;
-
-       if (type == IB_QPT_SMI)
-               ret = 0;
-       else if (type == IB_QPT_GSI)
-               ret = 1;
-
-       if (ret != -1) {
-               map = &qpt->map[0];
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page)) {
-                               ret = -ENOMEM;
-                               goto bail;
-                       }
-               }
-               if (!test_and_set_bit(ret, map->page))
-                       atomic_dec(&map->n_free);
-               else
-                       ret = -EBUSY;
-               goto bail;
-       }
-
-       qpn = qpt->last + 1;
-       if (qpn >= QPN_MAX)
-               qpn = 2;
-       offset = qpn & BITS_PER_PAGE_MASK;
-       map = &qpt->map[qpn / BITS_PER_PAGE];
-       max_scan = qpt->nmaps - !offset;
-       for (i = 0;;) {
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page))
-                               break;
-               }
-               if (likely(atomic_read(&map->n_free))) {
-                       do {
-                               if (!test_and_set_bit(offset, map->page)) {
-                                       atomic_dec(&map->n_free);
-                                       qpt->last = qpn;
-                                       ret = qpn;
-                                       goto bail;
-                               }
-                               offset = find_next_offset(map, offset);
-                               qpn = mk_qpn(qpt, map, offset);
-                               /*
-                                * This test differs from alloc_pidmap().
-                                * If find_next_offset() does find a zero
-                                * bit, we don't need to check for QPN
-                                * wrapping around past our starting QPN.
-                                * We just need to be sure we don't loop
-                                * forever.
-                                */
-                       } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-               }
-               /*
-                * In order to keep the number of pages allocated to a
-                * minimum, we scan the all existing pages before increasing
-                * the size of the bitmap table.
-                */
-               if (++i > max_scan) {
-                       if (qpt->nmaps == QPNMAP_ENTRIES)
-                               break;
-                       map = &qpt->map[qpt->nmaps++];
-                       offset = 0;
-               } else if (map < &qpt->map[qpt->nmaps]) {
-                       ++map;
-                       offset = 0;
-               } else {
-                       map = &qpt->map[0];
-                       offset = 2;
-               }
-               qpn = mk_qpn(qpt, map, offset);
-       }
-
-       ret = -ENOMEM;
-
-bail:
-       return ret;
-}
-
-static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       struct qpn_map *map;
-
-       map = qpt->map + qpn / BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-       atomic_inc(&map->n_free);
-}
-
-/**
- * ipath_alloc_qpn - allocate a QP number
- * @qpt: the QP table
- * @qp: the QP
- * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
- *
- * Allocate the next available QPN and put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
-                          enum ib_qp_type type)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = alloc_qpn(qpt, type);
-       if (ret < 0)
-               goto bail;
-       qp->ibqp.qp_num = ret;
-
-       /* Add the QP to the hash table. */
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       ret %= qpt->max;
-       qp->next = qpt->table[ret];
-       qpt->table[ret] = qp;
-       atomic_inc(&qp->refcount);
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_qp - remove a QP from the QP table
- * @qpt: the QP table
- * @qp: the QP to remove
- *
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
-{
-       struct ipath_qp *q, **qpp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       /* Remove QP from the hash table. */
-       qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
-       for (; (q = *qpp) != NULL; qpp = &q->next) {
-               if (q == qp) {
-                       *qpp = qp->next;
-                       qp->next = NULL;
-                       atomic_dec(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-/**
- * ipath_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-       u32 n, qp_inuse = 0;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       for (n = 0; n < qpt->max; n++) {
-               qp = qpt->table[n];
-               qpt->table[n] = NULL;
-
-               for (; qp; qp = qp->next)
-                       qp_inuse++;
-       }
-       spin_unlock_irqrestore(&qpt->lock, flags);
-
-       for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
-               if (qpt->map[n].page)
-                       free_page((unsigned long) qpt->map[n].page);
-       return qp_inuse;
-}
-
-/**
- * ipath_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
-               if (qp->ibqp.qp_num == qpn) {
-                       atomic_inc(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       return qp;
-}
-
-/**
- * ipath_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
-{
-       qp->remote_qpn = 0;
-       qp->qkey = 0;
-       qp->qp_access_flags = 0;
-       atomic_set(&qp->s_dma_busy, 0);
-       qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
-       qp->s_hdrwords = 0;
-       qp->s_wqe = NULL;
-       qp->s_pkt_delay = 0;
-       qp->s_draining = 0;
-       qp->s_psn = 0;
-       qp->r_psn = 0;
-       qp->r_msn = 0;
-       if (type == IB_QPT_RC) {
-               qp->s_state = IB_OPCODE_RC_SEND_LAST;
-               qp->r_state = IB_OPCODE_RC_SEND_LAST;
-       } else {
-               qp->s_state = IB_OPCODE_UC_SEND_LAST;
-               qp->r_state = IB_OPCODE_UC_SEND_LAST;
-       }
-       qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-       qp->r_nak_state = 0;
-       qp->r_aflags = 0;
-       qp->r_flags = 0;
-       qp->s_rnr_timeout = 0;
-       qp->s_head = 0;
-       qp->s_tail = 0;
-       qp->s_cur = 0;
-       qp->s_last = 0;
-       qp->s_ssn = 1;
-       qp->s_lsn = 0;
-       memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-       qp->r_head_ack_queue = 0;
-       qp->s_tail_ack_queue = 0;
-       qp->s_num_rd_atomic = 0;
-       if (qp->r_rq.wq) {
-               qp->r_rq.wq->head = 0;
-               qp->r_rq.wq->tail = 0;
-       }
-}
-
-/**
- * ipath_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       int ret = 0;
-
-       if (qp->state == IB_QPS_ERR)
-               goto bail;
-
-       qp->state = IB_QPS_ERR;
-
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       /* Schedule the sending tasklet to drain the send work queue. */
-       if (qp->s_last != qp->s_head)
-               ipath_schedule_send(qp);
-
-       memset(&wc, 0, sizeof(wc));
-       wc.qp = &qp->ibqp;
-       wc.opcode = IB_WC_RECV;
-
-       if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
-               wc.wr_id = qp->r_wr_id;
-               wc.status = err;
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       }
-       wc.status = IB_WC_WR_FLUSH_ERR;
-
-       if (qp->r_rq.wq) {
-               struct ipath_rwq *wq;
-               u32 head;
-               u32 tail;
-
-               spin_lock(&qp->r_rq.lock);
-
-               /* sanity check pointers before trusting them */
-               wq = qp->r_rq.wq;
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               while (tail != head) {
-                       wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-                       if (++tail >= qp->r_rq.size)
-                               tail = 0;
-                       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-               }
-               wq->tail = tail;
-
-               spin_unlock(&qp->r_rq.lock);
-       } else if (qp->ibqp.event_handler)
-               ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for ipathverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_qp *qp = to_iqp(ibqp);
-       enum ib_qp_state cur_state, new_state;
-       int lastwqe = 0;
-       int ret;
-
-       spin_lock_irq(&qp->s_lock);
-
-       cur_state = attr_mask & IB_QP_CUR_STATE ?
-               attr->cur_qp_state : qp->state;
-       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-               goto inval;
-
-       if (attr_mask & IB_QP_AV) {
-               if (attr->ah_attr.dlid == 0 ||
-                   attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
-                       goto inval;
-
-               if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
-                   (attr->ah_attr.grh.sgid_index > 1))
-                       goto inval;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
-                       goto inval;
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               if (attr->min_rnr_timer > 31)
-                       goto inval;
-
-       if (attr_mask & IB_QP_PORT)
-               if (attr->port_num == 0 ||
-                   attr->port_num > ibqp->device->phys_port_cnt)
-                       goto inval;
-
-       /*
-        * don't allow invalid Path MTU values or greater than 2048
-        * unless we are configured for a 4KB MTU
-        */
-       if ((attr_mask & IB_QP_PATH_MTU) &&
-               (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
-               (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
-               goto inval;
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE)
-               if (attr->path_mig_state != IB_MIG_MIGRATED &&
-                   attr->path_mig_state != IB_MIG_REARM)
-                       goto inval;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
-                       goto inval;
-
-       switch (new_state) {
-       case IB_QPS_RESET:
-               if (qp->state != IB_QPS_RESET) {
-                       qp->state = IB_QPS_RESET;
-                       spin_lock(&dev->pending_lock);
-                       if (!list_empty(&qp->timerwait))
-                               list_del_init(&qp->timerwait);
-                       if (!list_empty(&qp->piowait))
-                               list_del_init(&qp->piowait);
-                       spin_unlock(&dev->pending_lock);
-                       qp->s_flags &= ~IPATH_S_ANY_WAIT;
-                       spin_unlock_irq(&qp->s_lock);
-                       /* Stop the sending tasklet */
-                       tasklet_kill(&qp->s_task);
-                       wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-                       spin_lock_irq(&qp->s_lock);
-               }
-               ipath_reset_qp(qp, ibqp->qp_type);
-               break;
-
-       case IB_QPS_SQD:
-               qp->s_draining = qp->s_last != qp->s_cur;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_SQE:
-               if (qp->ibqp.qp_type == IB_QPT_RC)
-                       goto inval;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_ERR:
-               lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               break;
-
-       default:
-               qp->state = new_state;
-               break;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               qp->s_pkey_index = attr->pkey_index;
-
-       if (attr_mask & IB_QP_DEST_QPN)
-               qp->remote_qpn = attr->dest_qp_num;
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               qp->s_psn = qp->s_next_psn = attr->sq_psn;
-               qp->s_last_psn = qp->s_next_psn - 1;
-       }
-
-       if (attr_mask & IB_QP_RQ_PSN)
-               qp->r_psn = attr->rq_psn;
-
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->qp_access_flags = attr->qp_access_flags;
-
-       if (attr_mask & IB_QP_AV) {
-               qp->remote_ah_attr = attr->ah_attr;
-               qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU)
-               qp->path_mtu = attr->path_mtu;
-
-       if (attr_mask & IB_QP_RETRY_CNT)
-               qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
-
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               qp->s_rnr_retry = attr->rnr_retry;
-               if (qp->s_rnr_retry > 7)
-                       qp->s_rnr_retry = 7;
-               qp->s_rnr_retry_cnt = qp->s_rnr_retry;
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-       if (attr_mask & IB_QP_TIMEOUT)
-               qp->timeout = attr->timeout;
-
-       if (attr_mask & IB_QP_QKEY)
-               qp->qkey = attr->qkey;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-               qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-       spin_unlock_irq(&qp->s_lock);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-       ret = 0;
-       goto bail;
-
-inval:
-       spin_unlock_irq(&qp->s_lock);
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-
-       attr->qp_state = qp->state;
-       attr->cur_qp_state = attr->qp_state;
-       attr->path_mtu = qp->path_mtu;
-       attr->path_mig_state = 0;
-       attr->qkey = qp->qkey;
-       attr->rq_psn = qp->r_psn;
-       attr->sq_psn = qp->s_next_psn;
-       attr->dest_qp_num = qp->remote_qpn;
-       attr->qp_access_flags = qp->qp_access_flags;
-       attr->cap.max_send_wr = qp->s_size - 1;
-       attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-       attr->cap.max_send_sge = qp->s_max_sge;
-       attr->cap.max_recv_sge = qp->r_rq.max_sge;
-       attr->cap.max_inline_data = 0;
-       attr->ah_attr = qp->remote_ah_attr;
-       memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
-       attr->pkey_index = qp->s_pkey_index;
-       attr->alt_pkey_index = 0;
-       attr->en_sqd_async_notify = 0;
-       attr->sq_draining = qp->s_draining;
-       attr->max_rd_atomic = qp->s_max_rd_atomic;
-       attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-       attr->min_rnr_timer = qp->r_min_rnr_timer;
-       attr->port_num = 1;
-       attr->timeout = qp->timeout;
-       attr->retry_cnt = qp->s_retry_cnt;
-       attr->rnr_retry = qp->s_rnr_retry_cnt;
-       attr->alt_port_num = 0;
-       attr->alt_timeout = 0;
-
-       init_attr->event_handler = qp->ibqp.event_handler;
-       init_attr->qp_context = qp->ibqp.qp_context;
-       init_attr->send_cq = qp->ibqp.send_cq;
-       init_attr->recv_cq = qp->ibqp.recv_cq;
-       init_attr->srq = qp->ibqp.srq;
-       init_attr->cap = attr->cap;
-       if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
-               init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-       else
-               init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-       init_attr->qp_type = qp->ibqp.qp_type;
-       init_attr->port_num = 1;
-       return 0;
-}
-
-/**
- * ipath_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 ipath_compute_aeth(struct ipath_qp *qp)
-{
-       u32 aeth = qp->r_msn & IPATH_MSN_MASK;
-
-       if (qp->ibqp.srq) {
-               /*
-                * Shared receive queues don't generate credits.
-                * Set the credit field to the invalid value.
-                */
-               aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
-       } else {
-               u32 min, max, x;
-               u32 credits;
-               struct ipath_rwq *wq = qp->r_rq.wq;
-               u32 head;
-               u32 tail;
-
-               /* sanity check pointers before trusting them */
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               /*
-                * Compute the number of credits available (RWQEs).
-                * XXX Not holding the r_rq.lock here so there is a small
-                * chance that the pair of reads are not atomic.
-                */
-               credits = head - tail;
-               if ((int)credits < 0)
-                       credits += qp->r_rq.size;
-               /*
-                * Binary search the credit table to find the code to
-                * use.
-                */
-               min = 0;
-               max = 31;
-               for (;;) {
-                       x = (min + max) / 2;
-                       if (credit_table[x] == credits)
-                               break;
-                       if (credit_table[x] > credits)
-                               max = x;
-                       else if (min == x)
-                               break;
-                       else
-                               min = x;
-               }
-               aeth |= x << IPATH_AETH_CREDIT_SHIFT;
-       }
-       return cpu_to_be32(aeth);
-}
-
-/**
- * ipath_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: unused by InfiniPath
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata)
-{
-       struct ipath_qp *qp;
-       int err;
-       struct ipath_swqe *swq = NULL;
-       struct ipath_ibdev *dev;
-       size_t sz;
-       size_t sg_list_sz;
-       struct ib_qp *ret;
-
-       if (init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-           init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       /* Check receive queue parameters if no SRQ is specified. */
-       if (!init_attr->srq) {
-               if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-                   init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               if (init_attr->cap.max_send_sge +
-                   init_attr->cap.max_send_wr +
-                   init_attr->cap.max_recv_sge +
-                   init_attr->cap.max_recv_wr == 0) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-       }
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_UC:
-       case IB_QPT_RC:
-       case IB_QPT_UD:
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               sz = sizeof(struct ipath_sge) *
-                       init_attr->cap.max_send_sge +
-                       sizeof(struct ipath_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-               if (swq == NULL) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail;
-               }
-               sz = sizeof(*qp);
-               sg_list_sz = 0;
-               if (init_attr->srq) {
-                       struct ipath_srq *srq = to_isrq(init_attr->srq);
-
-                       if (srq->rq.max_sge > 1)
-                               sg_list_sz = sizeof(*qp->r_sg_list) *
-                                       (srq->rq.max_sge - 1);
-               } else if (init_attr->cap.max_recv_sge > 1)
-                       sg_list_sz = sizeof(*qp->r_sg_list) *
-                               (init_attr->cap.max_recv_sge - 1);
-               qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
-               if (!qp) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_swq;
-               }
-               if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
-                   init_attr->qp_type == IB_QPT_SMI ||
-                   init_attr->qp_type == IB_QPT_GSI)) {
-                       qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
-                       if (!qp->r_ud_sg_list) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_qp;
-                       }
-               } else
-                       qp->r_ud_sg_list = NULL;
-               if (init_attr->srq) {
-                       sz = 0;
-                       qp->r_rq.size = 0;
-                       qp->r_rq.max_sge = 0;
-                       qp->r_rq.wq = NULL;
-                       init_attr->cap.max_recv_wr = 0;
-                       init_attr->cap.max_recv_sge = 0;
-               } else {
-                       qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-                       qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-                       sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-                               sizeof(struct ipath_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
-                                             qp->r_rq.size * sz);
-                       if (!qp->r_rq.wq) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_sg_list;
-                       }
-               }
-
-               /*
-                * ib_create_qp() will initialize qp->ibqp
-                * except for qp->ibqp.qp_num.
-                */
-               spin_lock_init(&qp->s_lock);
-               spin_lock_init(&qp->r_rq.lock);
-               atomic_set(&qp->refcount, 0);
-               init_waitqueue_head(&qp->wait);
-               init_waitqueue_head(&qp->wait_dma);
-               tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
-               INIT_LIST_HEAD(&qp->piowait);
-               INIT_LIST_HEAD(&qp->timerwait);
-               qp->state = IB_QPS_RESET;
-               qp->s_wq = swq;
-               qp->s_size = init_attr->cap.max_send_wr + 1;
-               qp->s_max_sge = init_attr->cap.max_send_sge;
-               if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-                       qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
-               else
-                       qp->s_flags = 0;
-               dev = to_idev(ibpd->device);
-               err = ipath_alloc_qpn(&dev->qp_table, qp,
-                                     init_attr->qp_type);
-               if (err) {
-                       ret = ERR_PTR(err);
-                       vfree(qp->r_rq.wq);
-                       goto bail_sg_list;
-               }
-               qp->ip = NULL;
-               qp->s_tx = NULL;
-               ipath_reset_qp(qp, init_attr->qp_type);
-               break;
-
-       default:
-               /* Don't support raw QPs */
-               ret = ERR_PTR(-ENOSYS);
-               goto bail;
-       }
-
-       init_attr->cap.max_inline_data = 0;
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               if (!qp->r_rq.wq) {
-                       __u64 offset = 0;
-
-                       err = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               } else {
-                       u32 s = sizeof(struct ipath_rwq) +
-                               qp->r_rq.size * sz;
-
-                       qp->ip =
-                           ipath_create_mmap_info(dev, s,
-                                                  ibpd->uobject->context,
-                                                  qp->r_rq.wq);
-                       if (!qp->ip) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_ip;
-                       }
-
-                       err = ib_copy_to_udata(udata, &(qp->ip->offset),
-                                              sizeof(qp->ip->offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               }
-       }
-
-       spin_lock(&dev->n_qps_lock);
-       if (dev->n_qps_allocated == ib_ipath_max_qps) {
-               spin_unlock(&dev->n_qps_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_qps_allocated++;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &qp->ibqp;
-       goto bail;
-
-bail_ip:
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       ipath_free_qp(&dev->qp_table, qp);
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-bail_sg_list:
-       kfree(qp->r_ud_sg_list);
-bail_qp:
-       kfree(qp);
-bail_swq:
-       vfree(swq);
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int ipath_destroy_qp(struct ib_qp *ibqp)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-
-       /* Make sure HW and driver activity is stopped. */
-       spin_lock_irq(&qp->s_lock);
-       if (qp->state != IB_QPS_RESET) {
-               qp->state = IB_QPS_RESET;
-               spin_lock(&dev->pending_lock);
-               if (!list_empty(&qp->timerwait))
-                       list_del_init(&qp->timerwait);
-               if (!list_empty(&qp->piowait))
-                       list_del_init(&qp->piowait);
-               spin_unlock(&dev->pending_lock);
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-               spin_unlock_irq(&qp->s_lock);
-               /* Stop the sending tasklet */
-               tasklet_kill(&qp->s_task);
-               wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-       } else
-               spin_unlock_irq(&qp->s_lock);
-
-       ipath_free_qp(&dev->qp_table, qp);
-
-       if (qp->s_tx) {
-               atomic_dec(&qp->refcount);
-               if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-                       kfree(qp->s_tx->txreq.map_addr);
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-               spin_unlock_irq(&dev->pending_lock);
-               qp->s_tx = NULL;
-       }
-
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-       /* all user's cleaned up, mark it available */
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-       spin_lock(&dev->n_qps_lock);
-       dev->n_qps_allocated--;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       kfree(qp->r_ud_sg_list);
-       vfree(qp->s_wq);
-       kfree(qp);
-       return 0;
-}
-
-/**
- * ipath_init_qp_table - initialize the QP table for a device
- * @idev: the device who's QP table we're initializing
- * @size: the size of the QP table
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
-{
-       int i;
-       int ret;
-
-       idev->qp_table.last = 1;        /* QPN 0 and 1 are special. */
-       idev->qp_table.max = size;
-       idev->qp_table.nmaps = 1;
-       idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
-                                      GFP_KERNEL);
-       if (idev->qp_table.table == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
-               atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
-               idev->qp_table.map[i].page = NULL;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
-{
-       u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
-
-       /*
-        * If the credit is invalid, we can send
-        * as many packets as we like.  Otherwise, we have to
-        * honor the credit field.
-        */
-       if (credit == IPATH_AETH_CREDIT_INVAL)
-               qp->s_lsn = (u32) -1;
-       else if (qp->s_lsn != (u32) -1) {
-               /* Compute new LSN (i.e., MSN + credit) */
-               credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
-               if (ipath_cmp24(credit, qp->s_lsn) > 0)
-                       qp->s_lsn = credit;
-       }
-
-       /* Restart sending if it was blocked due to lack of credits. */
-       if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
-           qp->s_cur != qp->s_head &&
-           (qp->s_lsn == (u32) -1 ||
-            ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
-                        qp->s_lsn + 1) <= 0))
-               ipath_schedule_send(qp);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644 (file)
index d4aa535..0000000
+++ /dev/null
@@ -1,1969 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
-
-static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
-                      u32 psn, u32 pmtu)
-{
-       u32 len;
-
-       len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-       ss->sge = wqe->sg_list[0];
-       ss->sg_list = wqe->sg_list + 1;
-       ss->num_sge = wqe->wr.num_sge;
-       ipath_skip_sge(ss, len);
-       return wqe->length - len;
-}
-
-/**
- * ipath_init_restart- initialize the qp->s_sge after a restart
- * @qp: the QP who's SGE we're restarting
- * @wqe: the work queue to initialize the QP's SGE from
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
-{
-       struct ipath_ibdev *dev;
-
-       qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
-                               ib_mtu_enum_to_int(qp->path_mtu));
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&qp->timerwait))
-               list_add_tail(&qp->timerwait,
-                             &dev->pending[dev->pending_index]);
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
- * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- *
- * Return 1 if constructed; otherwise, return 0.
- * Note that we are in the responder's side of the QP context.
- * Note the QP s_lock must be held.
- */
-static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                            struct ipath_other_headers *ohdr, u32 pmtu)
-{
-       struct ipath_ack_entry *e;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-
-       /* Don't send an ACK if we aren't supposed to. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto bail;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-
-       switch (qp->s_ack_state) {
-       case OP(RDMA_READ_RESPONSE_LAST):
-       case OP(RDMA_READ_RESPONSE_ONLY):
-       case OP(ATOMIC_ACKNOWLEDGE):
-               /*
-                * We can increment the tail pointer now that the last
-                * response has been sent instead of only being
-                * constructed.
-                */
-               if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-                       qp->s_tail_ack_queue = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_ONLY):
-       case OP(ACKNOWLEDGE):
-               /* Check for no next entry in the queue. */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-                       if (qp->s_flags & IPATH_S_ACK_PENDING)
-                               goto normal;
-                       qp->s_ack_state = OP(ACKNOWLEDGE);
-                       goto bail;
-               }
-
-               e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-               if (e->opcode == OP(RDMA_READ_REQUEST)) {
-                       /* Copy SGE state in case we need to resend */
-                       qp->s_ack_rdma_sge = e->rdma_sge;
-                       qp->s_cur_sge = &qp->s_ack_rdma_sge;
-                       len = e->rdma_sge.sge.sge_length;
-                       if (len > pmtu) {
-                               len = pmtu;
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-                       } else {
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-                               e->sent = 1;
-                       }
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_rdma_psn = e->psn;
-                       bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               } else {
-                       /* COMPARE_SWAP or FETCH_ADD */
-                       qp->s_cur_sge = NULL;
-                       len = 0;
-                       qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
-                       ohdr->u.at.aeth = ipath_compute_aeth(qp);
-                       ohdr->u.at.atomic_ack_eth[0] =
-                               cpu_to_be32(e->atomic_data >> 32);
-                       ohdr->u.at.atomic_ack_eth[1] =
-                               cpu_to_be32(e->atomic_data);
-                       hwords += sizeof(ohdr->u.at) / sizeof(u32);
-                       bth2 = e->psn;
-                       e->sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               len = qp->s_ack_rdma_sge.sge.sge_length;
-               if (len > pmtu)
-                       len = pmtu;
-               else {
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               break;
-
-       default:
-       normal:
-               /*
-                * Send a regular ACK.
-                * Set the s_ack_state so we wait until after sending
-                * the ACK before setting s_ack_state to ACKNOWLEDGE
-                * (see above).
-                */
-               qp->s_ack_state = OP(SEND_ONLY);
-               qp->s_flags &= ~IPATH_S_ACK_PENDING;
-               qp->s_cur_sge = NULL;
-               if (qp->s_nak_state)
-                       ohdr->u.aeth =
-                               cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->s_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-               else
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-               hwords++;
-               len = 0;
-               bth0 = OP(ACKNOWLEDGE) << 24;
-               bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
-       }
-       qp->s_hdrwords = hwords;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
-       return 1;
-
-bail:
-       return 0;
-}
-
-/**
- * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_rc_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ipath_sge_state *ss;
-       struct ipath_swqe *wqe;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       char newreq;
-       unsigned long flags;
-       int ret = 0;
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /*
-        * The lock is needed to synchronize between the sending tasklet,
-        * the receive interrupt handler, and timeout resends.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Sending responses has higher priority over sending requests. */
-       if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-            (qp->s_flags & IPATH_S_ACK_PENDING) ||
-            qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-           ipath_make_rc_ack(dev, qp, ohdr, pmtu))
-               goto done;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       /* Leave BUSY set until RNR timeout. */
-       if (qp->s_rnr_timeout) {
-               qp->s_flags |= IPATH_S_WAITING;
-               goto bail;
-       }
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Send a request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /*
-                * Resend an old request or start a new one.
-                *
-                * We keep track of the current SWQE so that
-                * we don't reset the "furthest progress" state
-                * if we need to back up.
-                */
-               newreq = 0;
-               if (qp->s_cur == qp->s_tail) {
-                       /* Check if send work queue is empty. */
-                       if (qp->s_tail == qp->s_head)
-                               goto bail;
-                       /*
-                        * If a fence is requested, wait for previous
-                        * RDMA read and atomic operations to finish.
-                        */
-                       if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
-                           qp->s_num_rd_atomic) {
-                               qp->s_flags |= IPATH_S_FENCE_PENDING;
-                               goto bail;
-                       }
-                       wqe->psn = qp->s_next_psn;
-                       newreq = 1;
-               }
-               /*
-                * Note that we have to be careful not to modify the
-                * original work request since we may need to resend
-                * it.
-                */
-               len = wqe->length;
-               ss = &qp->s_sge;
-               bth2 = 0;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-                       if (newreq && qp->s_lsn != (u32) -1)
-                               qp->s_lsn++;
-                       /* FALLTHROUGH */
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / sizeof(u32);
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_READ:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               /*
-                                * Adjust s_next_psn to count the
-                                * expected number of responses.
-                                */
-                               if (len > pmtu)
-                                       qp->s_next_psn += (len - 1) / pmtu;
-                               wqe->lpsn = qp->s_next_psn++;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       qp->s_state = OP(RDMA_READ_REQUEST);
-                       hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_ATOMIC_CMP_AND_SWP:
-               case IB_WR_ATOMIC_FETCH_AND_ADD:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               wqe->lpsn = wqe->psn;
-                       }
-                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-                               qp->s_state = OP(COMPARE_SWAP);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.swap);
-                               ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                       } else {
-                               qp->s_state = OP(FETCH_ADD);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                               ohdr->u.atomic_eth.compare_data = 0;
-                       }
-                       ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr >> 32);
-                       ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr);
-                       ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->atomic_wr.rkey);
-                       hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = wqe->length;
-               if (newreq) {
-                       qp->s_tail++;
-                       if (qp->s_tail >= qp->s_size)
-                               qp->s_tail = 0;
-               }
-               bth2 |= qp->s_psn & IPATH_PSN_MASK;
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       qp->s_psn = wqe->lpsn + 1;
-               else {
-                       qp->s_psn++;
-                       if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                               qp->s_next_psn = qp->s_psn;
-               }
-               /*
-                * Put the QP on the pending list so lost ACKs will cause
-                * a retry.  More than one request can be pending so the
-                * QP may already be on the dev->pending list.
-                */
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->timerwait))
-                       list_add_tail(&qp->timerwait,
-                                     &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               /*
-                * This case can only happen if a send is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /*
-                * This case can only happen if a RDMA write is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /*
-                * This case can only happen if a RDMA read is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-               ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
-               ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->rdma_wr.rkey);
-               ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
-               qp->s_state = OP(RDMA_READ_REQUEST);
-               hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-               bth2 = qp->s_psn & IPATH_PSN_MASK;
-               qp->s_psn = wqe->lpsn + 1;
-               ss = NULL;
-               len = 0;
-               qp->s_cur++;
-               if (qp->s_cur == qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
-               bth2 |= 1 << 31;        /* Request ACK. */
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = ss;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
- *
- * This is called from ipath_rc_rcv() and only uses the receive
- * side QP state.
- * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
- */
-static void send_rc_ack(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd;
-       u16 lrh0;
-       u32 bth0;
-       u32 hwords;
-       u32 __iomem *piobuf;
-       struct ipath_ib_header hdr;
-       struct ipath_other_headers *ohdr;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-       if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-           (qp->s_flags & IPATH_S_ACK_PENDING) ||
-           qp->s_ack_state != OP(ACKNOWLEDGE))
-               goto queue_ack;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       /* Don't try to send ACKs if the link isn't ACTIVE */
-       dd = dev->dd;
-       if (!(dd->ipath_flags & IPATH_LINKACTIVE))
-               goto done;
-
-       piobuf = ipath_getpiobuf(dd, 0, NULL);
-       if (!piobuf) {
-               /*
-                * We are out of PIO buffers at the moment.
-                * Pass responsibility for sending the ACK to the
-                * send tasklet so that when a PIO buffer becomes
-                * available, the ACK is sent ahead of other outgoing
-                * packets.
-                */
-               spin_lock_irqsave(&qp->s_lock, flags);
-               goto queue_ack;
-       }
-
-       /* Construct the header. */
-       ohdr = &hdr.u.oth;
-       lrh0 = IPATH_LRH_BTH;
-       /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
-       hwords = 6;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               hwords += ipath_make_grh(dev, &hdr.u.l.grh,
-                                        &qp->remote_ah_attr.grh,
-                                        hwords, 0);
-               ohdr = &hdr.u.l.oth;
-               lrh0 = IPATH_LRH_GRH;
-       }
-       /* read pkey_index w/o lock (its atomic) */
-       bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
-               (OP(ACKNOWLEDGE) << 24) | (1 << 22);
-       if (qp->r_nak_state)
-               ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->r_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-       else
-               ohdr->u.aeth = ipath_compute_aeth(qp);
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       hdr.lrh[0] = cpu_to_be16(lrh0);
-       hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-       hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
-                                qp->remote_ah_attr.src_path_bits);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
-
-       writeq(hwords + 1, piobuf);
-
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               u32 *hdrp = (u32 *) &hdr;
-
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
-
-       ipath_flush_wc();
-
-       dev->n_unicast_xmit++;
-       goto done;
-
-queue_ack:
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
-               dev->n_rc_qacks++;
-               qp->s_flags |= IPATH_S_ACK_PENDING;
-               qp->s_nak_state = qp->r_nak_state;
-               qp->s_ack_psn = qp->r_ack_psn;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-       }
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
-
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
-{
-       u32 n = qp->s_last;
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
-       u32 opcode;
-
-       qp->s_cur = n;
-
-       /*
-        * If we are starting the request from the beginning,
-        * let the normal send code handle initialization.
-        */
-       if (ipath_cmp24(psn, wqe->psn) <= 0) {
-               qp->s_state = OP(SEND_LAST);
-               goto done;
-       }
-
-       /* Find the work request opcode corresponding to the given PSN. */
-       opcode = wqe->wr.opcode;
-       for (;;) {
-               int diff;
-
-               if (++n == qp->s_size)
-                       n = 0;
-               if (n == qp->s_tail)
-                       break;
-               wqe = get_swqe_ptr(qp, n);
-               diff = ipath_cmp24(psn, wqe->psn);
-               if (diff < 0)
-                       break;
-               qp->s_cur = n;
-               /*
-                * If we are starting the request from the beginning,
-                * let the normal send code handle initialization.
-                */
-               if (diff == 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       goto done;
-               }
-               opcode = wqe->wr.opcode;
-       }
-
-       /*
-        * Set the state to restart in the middle of a request.
-        * Don't change the s_sge, s_cur_sge, or s_cur_size.
-        * See ipath_make_rc_req().
-        */
-       switch (opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
-               break;
-
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
-               break;
-
-       case IB_WR_RDMA_READ:
-               qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               break;
-
-       default:
-               /*
-                * This case shouldn't happen since its only
-                * one PSN per req.
-                */
-               qp->s_state = OP(SEND_LAST);
-       }
-done:
-       qp->s_psn = psn;
-}
-
-/**
- * ipath_restart_rc - back up requester to resend the last un-ACKed request
- * @qp: the QP to restart
- * @psn: packet sequence number for the request
- * @wc: the work completion request
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
-{
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-       struct ipath_ibdev *dev;
-
-       if (qp->s_retry == 0) {
-               ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-               ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               goto bail;
-       }
-       qp->s_retry--;
-
-       /*
-        * Remove the QP from the timeout queue.
-        * Note: it may already have been removed by ipath_ib_timer().
-        */
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       if (wqe->wr.opcode == IB_WR_RDMA_READ)
-               dev->n_rc_resends++;
-       else
-               dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-       reset_psn(qp, psn);
-       ipath_schedule_send(qp);
-
-bail:
-       return;
-}
-
-static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
-{
-       qp->s_last_psn = psn;
-}
-
-/**
- * do_rc_ack - process an incoming RC ACK
- * @qp: the QP the ACK came in on
- * @psn: the packet sequence number of the ACK
- * @opcode: the opcode of the request that resulted in the ACK
- *
- * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held and interrupts disabled.
- * Returns 1 if OK, 0 if current operation should be aborted (NAK).
- */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
-                    u64 val)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       enum ib_wc_status status;
-       struct ipath_swqe *wqe;
-       int ret = 0;
-       u32 ack_psn;
-       int diff;
-
-       /*
-        * Remove the QP from the timeout queue (or RNR timeout queue).
-        * If ipath_ib_timer() has already removed it,
-        * it's OK since we hold the QP s_lock and ipath_restart_rc()
-        * just won't find anything to restart if we ACK everything.
-        */
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       spin_unlock(&dev->pending_lock);
-
-       /*
-        * Note that NAKs implicitly ACK outstanding SEND and RDMA write
-        * requests and implicitly NAK RDMA read and atomic requests issued
-        * before the NAK'ed request.  The MSN won't include the NAK'ed
-        * request but will include an ACK'ed request(s).
-        */
-       ack_psn = psn;
-       if (aeth >> 29)
-               ack_psn--;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-
-       /*
-        * The MSN might be for a later WQE than the PSN indicates so
-        * only complete WQEs that the PSN finishes.
-        */
-       while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
-               /*
-                * RDMA_READ_RESPONSE_ONLY is a special case since
-                * we want to generate completion events for everything
-                * before the RDMA read, copy the data, then generate
-                * the completion for the read.
-                */
-               if (wqe->wr.opcode == IB_WR_RDMA_READ &&
-                   opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
-                   diff == 0) {
-                       ret = 1;
-                       goto bail;
-               }
-               /*
-                * If this request is a RDMA read or atomic, and the ACK is
-                * for a later operation, this ACK NAKs the RDMA read or
-                * atomic.  In other words, only a RDMA_READ_LAST or ONLY
-                * can ACK a RDMA read and likewise for atomic ops.  Note
-                * that the NAK case can only happen if relaxed ordering is
-                * used and requests are sent after an RDMA read or atomic
-                * is sent but before the response is received.
-                */
-               if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-                    (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
-                   ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-                    (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-                       /*
-                        * The last valid PSN seen is the previous
-                        * request's.
-                        */
-                       update_last_psn(qp, wqe->psn - 1);
-                       /* Retry this request. */
-                       ipath_restart_rc(qp, wqe->psn);
-                       /*
-                        * No need to process the ACK/NAK since we are
-                        * restarting an earlier request.
-                        */
-                       goto bail;
-               }
-               if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                   wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-                       *(u64 *) wqe->sg_list[0].vaddr = val;
-               if (qp->s_num_rd_atomic &&
-                   (wqe->wr.opcode == IB_WR_RDMA_READ ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
-                       qp->s_num_rd_atomic--;
-                       /* Restart sending task if fence is complete */
-                       if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-                            !qp->s_num_rd_atomic) ||
-                           qp->s_flags & IPATH_S_RDMAR_PENDING)
-                               ipath_schedule_send(qp);
-               }
-               /* Post a send completion queue entry if requested. */
-               if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-                   (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-                       memset(&wc, 0, sizeof wc);
-                       wc.wr_id = wqe->wr.wr_id;
-                       wc.status = IB_WC_SUCCESS;
-                       wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-                       wc.byte_len = wqe->length;
-                       wc.qp = &qp->ibqp;
-                       wc.src_qp = qp->remote_qpn;
-                       wc.slid = qp->remote_ah_attr.dlid;
-                       wc.sl = qp->remote_ah_attr.sl;
-                       ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-               }
-               qp->s_retry = qp->s_retry_cnt;
-               /*
-                * If we are completing a request which is in the process of
-                * being resent, we can stop resending it since we know the
-                * responder has already seen it.
-                */
-               if (qp->s_last == qp->s_cur) {
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       qp->s_last = qp->s_cur;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_cur);
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = wqe->psn;
-               } else {
-                       if (++qp->s_last >= qp->s_size)
-                               qp->s_last = 0;
-                       if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
-                               qp->s_draining = 0;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_last);
-               }
-       }
-
-       switch (aeth >> 29) {
-       case 0:         /* ACK */
-               dev->n_rc_acks++;
-               /* If this is a partial ACK, reset the retransmit timer. */
-               if (qp->s_last != qp->s_tail) {
-                       spin_lock(&dev->pending_lock);
-                       if (list_empty(&qp->timerwait))
-                               list_add_tail(&qp->timerwait,
-                                       &dev->pending[dev->pending_index]);
-                       spin_unlock(&dev->pending_lock);
-                       /*
-                        * If we get a partial ACK for a resent operation,
-                        * we can stop resending the earlier packets and
-                        * continue with the next packet the receiver wants.
-                        */
-                       if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                               reset_psn(qp, psn + 1);
-                               ipath_schedule_send(qp);
-                       }
-               } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = psn + 1;
-               }
-               ipath_get_credit(qp, aeth);
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               qp->s_retry = qp->s_retry_cnt;
-               update_last_psn(qp, psn);
-               ret = 1;
-               goto bail;
-
-       case 1:         /* RNR NAK */
-               dev->n_rnr_naks++;
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               if (qp->s_rnr_retry == 0) {
-                       status = IB_WC_RNR_RETRY_EXC_ERR;
-                       goto class_b;
-               }
-               if (qp->s_rnr_retry_cnt < 7)
-                       qp->s_rnr_retry--;
-
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       dev->n_rc_resends++;
-               else
-                       dev->n_rc_resends +=
-                               (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-               reset_psn(qp, psn);
-
-               qp->s_rnr_timeout =
-                       ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                                          IPATH_AETH_CREDIT_MASK];
-               ipath_insert_rnr_queue(qp);
-               ipath_schedule_send(qp);
-               goto bail;
-
-       case 3:         /* NAK */
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-               switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                       IPATH_AETH_CREDIT_MASK) {
-               case 0: /* PSN sequence error */
-                       dev->n_seq_naks++;
-                       /*
-                        * Back up to the responder's expected PSN.
-                        * Note that we might get a NAK in the middle of an
-                        * RDMA READ response which terminates the RDMA
-                        * READ.
-                        */
-                       ipath_restart_rc(qp, psn);
-                       break;
-
-               case 1: /* Invalid Request */
-                       status = IB_WC_REM_INV_REQ_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 2: /* Remote Access Error */
-                       status = IB_WC_REM_ACCESS_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 3: /* Remote Operation Error */
-                       status = IB_WC_REM_OP_ERR;
-                       dev->n_other_naks++;
-               class_b:
-                       ipath_send_complete(qp, wqe, status);
-                       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-                       break;
-
-               default:
-                       /* Ignore other reserved NAK error codes */
-                       goto reserved;
-               }
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               goto bail;
-
-       default:                /* 2: reserved */
-       reserved:
-               /* Ignore reserved NAK codes. */
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rc_rcv_resp - process an incoming RC response packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an incoming RC response
- * packet for the given QP.
- * Called at interrupt level.
- */
-static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data, u32 tlen,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn, u32 hdrsize, u32 pmtu,
-                                    int header_in_data)
-{
-       struct ipath_swqe *wqe;
-       enum ib_wc_status status;
-       unsigned long flags;
-       int diff;
-       u32 pad;
-       u32 aeth;
-       u64 val;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto ack_done;
-
-       /* Ignore invalid responses. */
-       if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
-               goto ack_done;
-
-       /* Ignore duplicate responses. */
-       diff = ipath_cmp24(psn, qp->s_last_psn);
-       if (unlikely(diff <= 0)) {
-               /* Update credits for "ghost" ACKs */
-               if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
-                       if (!header_in_data)
-                               aeth = be32_to_cpu(ohdr->u.aeth);
-                       else {
-                               aeth = be32_to_cpu(((__be32 *) data)[0]);
-                               data += sizeof(__be32);
-                       }
-                       if ((aeth >> 29) == 0)
-                               ipath_get_credit(qp, aeth);
-               }
-               goto ack_done;
-       }
-
-       if (unlikely(qp->s_last == qp->s_tail))
-               goto ack_done;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-       status = IB_WC_SUCCESS;
-
-       switch (opcode) {
-       case OP(ACKNOWLEDGE):
-       case OP(ATOMIC_ACKNOWLEDGE):
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-                       if (!header_in_data) {
-                               __be32 *p = ohdr->u.at.atomic_ack_eth;
-
-                               val = ((u64) be32_to_cpu(p[0]) << 32) |
-                                       be32_to_cpu(p[1]);
-                       } else
-                               val = be64_to_cpu(((__be64 *) data)[0]);
-               } else
-                       val = 0;
-               if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
-                   opcode != OP(RDMA_READ_RESPONSE_FIRST))
-                       goto ack_done;
-               hdrsize += 4;
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_middle;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /* no AETH, no ACK */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-       read_middle:
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto ack_len_err;
-               if (unlikely(pmtu >= qp->s_rdma_read_len))
-                       goto ack_len_err;
-
-               /* We got a response so update the timeout. */
-               spin_lock(&dev->pending_lock);
-               if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
-                       list_move_tail(&qp->timerwait,
-                                      &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-
-               if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
-                       qp->s_retry = qp->s_retry_cnt;
-
-               /*
-                * Update the RDMA receive state but do the copy w/o
-                * holding the locks and blocking interrupts.
-                */
-               qp->s_rdma_read_len -= pmtu;
-               update_last_psn(qp, psn);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
-               goto bail;
-
-       case OP(RDMA_READ_RESPONSE_ONLY):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-               if (!do_rc_ack(qp, aeth, psn, opcode, 0))
-                       goto ack_done;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 0 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen < (hdrsize + pad + 8)))
-                       goto ack_len_err;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_last;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /* ACKs READ req. */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 1 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen <= (hdrsize + pad + 8)))
-                       goto ack_len_err;
-       read_last:
-               tlen -= hdrsize + pad + 8;
-               if (unlikely(tlen != qp->s_rdma_read_len))
-                       goto ack_len_err;
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-               (void) do_rc_ack(qp, aeth, psn,
-                                OP(RDMA_READ_RESPONSE_LAST), 0);
-               goto ack_done;
-       }
-
-ack_op_err:
-       status = IB_WC_LOC_QP_OP_ERR;
-       goto ack_err;
-
-ack_len_err:
-       status = IB_WC_LOC_LEN_ERR;
-ack_err:
-       ipath_send_complete(qp, wqe, status);
-       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-ack_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-bail:
-       return;
-}
-
-/**
- * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @diff: the difference between the PSN and the expected PSN
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an unexpected
- * incoming RC packet for the given QP.
- * Called at interrupt level.
- * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent.
- */
-static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn,
-                                    int diff,
-                                    int header_in_data)
-{
-       struct ipath_ack_entry *e;
-       u8 i, prev;
-       int old_req;
-       unsigned long flags;
-
-       if (diff > 0) {
-               /*
-                * Packet sequence error.
-                * A NAK will ACK earlier sends and RDMA writes.
-                * Don't queue the NAK if we already sent one.
-                */
-               if (!qp->r_nak_state) {
-                       qp->r_nak_state = IB_NAK_PSN_ERROR;
-                       /* Use the expected PSN. */
-                       qp->r_ack_psn = qp->r_psn;
-                       goto send_ack;
-               }
-               goto done;
-       }
-
-       /*
-        * Handle a duplicate request.  Don't re-execute SEND, RDMA
-        * write or atomic op.  Don't NAK errors, just silently drop
-        * the duplicate request.  Note that r_sge, r_len, and
-        * r_rcv_len may be in use so don't modify them.
-        *
-        * We are supposed to ACK the earliest duplicate PSN but we
-        * can coalesce an outstanding duplicate ACK.  We have to
-        * send the earliest so that RDMA reads can be restarted at
-        * the requester's expected PSN.
-        *
-        * First, find where this duplicate PSN falls within the
-        * ACKs previously sent.
-        */
-       psn &= IPATH_PSN_MASK;
-       e = NULL;
-       old_req = 1;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto unlock_done;
-
-       for (i = qp->r_head_ack_queue; ; i = prev) {
-               if (i == qp->s_tail_ack_queue)
-                       old_req = 0;
-               if (i)
-                       prev = i - 1;
-               else
-                       prev = IPATH_MAX_RDMA_ATOMIC;
-               if (prev == qp->r_head_ack_queue) {
-                       e = NULL;
-                       break;
-               }
-               e = &qp->s_ack_queue[prev];
-               if (!e->opcode) {
-                       e = NULL;
-                       break;
-               }
-               if (ipath_cmp24(psn, e->psn) >= 0) {
-                       if (prev == qp->s_tail_ack_queue)
-                               old_req = 0;
-                       break;
-               }
-       }
-       switch (opcode) {
-       case OP(RDMA_READ_REQUEST): {
-               struct ib_reth *reth;
-               u32 offset;
-               u32 len;
-
-               /*
-                * If we didn't find the RDMA read request in the ack queue,
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
-                       goto unlock_done;
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               /*
-                * Address range must be a subset of the original
-                * request and start on pmtu boundaries.
-                * We reuse the old ack_queue slot since the requester
-                * should not back up and request an earlier PSN for the
-                * same request.
-                */
-               offset = ((psn - e->psn) & IPATH_PSN_MASK) *
-                       ib_mtu_enum_to_int(qp->path_mtu);
-               len = be32_to_cpu(reth->length);
-               if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
-                       goto unlock_done;
-               if (len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge,
-                                          len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto unlock_done;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->psn = psn;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               /*
-                * If we didn't find the atomic request in the ack queue
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != (u8) opcode || old_req)
-                       goto unlock_done;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       default:
-               if (old_req)
-                       goto unlock_done;
-               /*
-                * Resend the most recent ACK if this request is
-                * after all the previous RDMA reads and atomics.
-                */
-               if (i == qp->r_head_ack_queue) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->r_psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Try to send a simple ACK to work around a Mellanox bug
-                * which doesn't accept a RDMA read response or atomic
-                * response as an ACK for earlier SENDs or RDMA writes.
-                */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
-                   !(qp->s_flags & IPATH_S_ACK_PENDING) &&
-                   qp->s_ack_state == OP(ACKNOWLEDGE)) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Resend the RDMA read or atomic op which
-                * ACKs this duplicate request.
-                */
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = i;
-               break;
-       }
-       qp->r_nak_state = 0;
-       ipath_schedule_send(qp);
-
-unlock_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return 1;
-
-send_ack:
-       return 0;
-}
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       unsigned long flags;
-       int lastwqe;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       lastwqe = ipath_error_qp(qp, err);
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-}
-
-static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
-{
-       unsigned next;
-
-       next = n + 1;
-       if (next > IPATH_MAX_RDMA_ATOMIC)
-               next = 0;
-       if (n == qp->s_tail_ack_queue) {
-               qp->s_tail_ack_queue = next;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-       }
-}
-
-/**
- * ipath_rc_rcv - process an incoming RC packet
- * @dev: the device this packet came in on
- * @hdr: the header of this packet
- * @has_grh: true if the header has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- *
- * This is called from ipath_qp_rcv() to process an incoming RC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       u32 opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int diff;
-       struct ib_reth *reth;
-       int header_in_data;
-       unsigned long flags;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 4
-                * bytes of the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-
-       /*
-        * Process responses (ACKs) before anything else.  Note that the
-        * packet sequence number will be for something in the send work
-        * queue rather than the expected receive packet sequence number.
-        * In other words, this QP is the requester.
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
-           opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
-               ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
-                                 hdrsize, pmtu, header_in_data);
-               goto done;
-       }
-
-       /* Compute 24 bits worth of difference. */
-       diff = ipath_cmp24(psn, qp->r_psn);
-       if (unlikely(diff)) {
-               if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
-                                      psn, diff, header_in_data))
-                       goto done;
-               goto send_ack;
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       default:
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       goto nack_inv;
-               /*
-                * Note that it is up to the requester to not send a new
-                * RDMA read or atomic operation before receiving an ACK
-                * for the previous operation.
-                */
-               break;
-       }
-
-       memset(&wc, 0, sizeof wc);
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-       case OP(RDMA_WRITE_MIDDLE):
-       send_middle:
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto nack_inv;
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-               /* consume RWQE */
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       case OP(RDMA_WRITE_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4)))
-                       goto nack_inv;
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               qp->r_msn++;
-               if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-                       break;
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               else
-                       wc.opcode = IB_WC_RECV;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE)))
-                       goto nack_inv;
-               /* consume RWQE */
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge,
-                                          qp->r_len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok))
-                               goto nack_acc;
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (opcode == OP(RDMA_WRITE_FIRST))
-                       goto send_middle;
-               else if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto send_last;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(RDMA_READ_REQUEST): {
-               struct ipath_ack_entry *e;
-               u32 len;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_READ)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               len = be32_to_cpu(reth->length);
-               if (len) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-                                          rkey, IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto nack_acc_unlck;
-                       /*
-                        * Update the next expected PSN.  We add 1 later
-                        * below, so only add the remainder here.
-                        */
-                       if (len > pmtu)
-                               qp->r_psn += (len - 1) / pmtu;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn;
-               /*
-                * We need to increment the MSN here instead of when we
-                * finish sending the result since a duplicate request would
-                * increment it more than once.
-                */
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               struct ib_atomic_eth *ateth;
-               struct ipath_ack_entry *e;
-               u64 vaddr;
-               atomic64_t *maddr;
-               u64 sdata;
-               u32 rkey;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               if (!header_in_data)
-                       ateth = &ohdr->u.atomic_eth;
-               else
-                       ateth = (struct ib_atomic_eth *)data;
-               vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-                       be32_to_cpu(ateth->vaddr[1]);
-               if (unlikely(vaddr & (sizeof(u64) - 1)))
-                       goto nack_inv_unlck;
-               rkey = be32_to_cpu(ateth->rkey);
-               /* Check rkey & NAK */
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
-                                           sizeof(u64), vaddr, rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_acc_unlck;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = be64_to_cpu(ateth->swap_data);
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     be64_to_cpu(ateth->compare_data),
-                                     sdata);
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn & IPATH_PSN_MASK;
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       default:
-               /* NAK unknown opcodes. */
-               goto nack_inv;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-       qp->r_ack_psn = psn;
-       qp->r_nak_state = 0;
-       /* Send an ACK if requested or required. */
-       if (psn & (1 << 31))
-               goto send_ack;
-       goto done;
-
-rnr_nak:
-       qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_inv_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_inv:
-       ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-       qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_acc_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_acc:
-       ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
-       qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-       qp->r_ack_psn = qp->r_psn;
-send_ack:
-       send_rc_ack(qp);
-       goto done;
-
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644 (file)
index 8f44d0c..0000000
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_REGISTERS_H
-#define _IPATH_REGISTERS_H
-
-/*
- * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for InfiniPath chips.
- */
-
-/*
- * These are the InfiniPath register and buffer bit definitions,
- * that are visible to software, and needed only by the kernel
- * and diag code.  A few, that are visible to protocol and user
- * code are in ipath_common.h.  Some bits are specific
- * to a given chip implementation, and have been moved to the
- * chip-specific source file
- */
-
-/* kr_revision bits */
-#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
-#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
-#define INFINIPATH_R_ARCH_MASK 0xFF
-#define INFINIPATH_R_ARCH_SHIFT 16
-#define INFINIPATH_R_SOFTWARE_MASK 0xFF
-#define INFINIPATH_R_SOFTWARE_SHIFT 24
-#define INFINIPATH_R_BOARDID_MASK 0xFF
-#define INFINIPATH_R_BOARDID_SHIFT 32
-
-/* kr_control bits */
-#define INFINIPATH_C_FREEZEMODE 0x00000002
-#define INFINIPATH_C_LINKENABLE 0x00000004
-
-/* kr_sendctrl bits */
-#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
-#define INFINIPATH_S_UPDTHRESH_SHIFT 24
-#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
-
-#define IPATH_S_ABORT          0
-#define IPATH_S_PIOINTBUFAVAIL 1
-#define IPATH_S_PIOBUFAVAILUPD 2
-#define IPATH_S_PIOENABLE      3
-#define IPATH_S_SDMAINTENABLE  9
-#define IPATH_S_SDMASINGLEDESCRIPTOR   10
-#define IPATH_S_SDMAENABLE     11
-#define IPATH_S_SDMAHALT       12
-#define IPATH_S_DISARM         31
-
-#define INFINIPATH_S_ABORT             (1U << IPATH_S_ABORT)
-#define INFINIPATH_S_PIOINTBUFAVAIL    (1U << IPATH_S_PIOINTBUFAVAIL)
-#define INFINIPATH_S_PIOBUFAVAILUPD    (1U << IPATH_S_PIOBUFAVAILUPD)
-#define INFINIPATH_S_PIOENABLE         (1U << IPATH_S_PIOENABLE)
-#define INFINIPATH_S_SDMAINTENABLE     (1U << IPATH_S_SDMAINTENABLE)
-#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
-                                       (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
-#define INFINIPATH_S_SDMAENABLE                (1U << IPATH_S_SDMAENABLE)
-#define INFINIPATH_S_SDMAHALT          (1U << IPATH_S_SDMAHALT)
-#define INFINIPATH_S_DISARM            (1U << IPATH_S_DISARM)
-
-/* kr_rcvctrl bits that are the same on multiple chips */
-#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_SDMAINT           0x8000000000000000ULL
-#define INFINIPATH_I_SDMADISABLED      0x4000000000000000ULL
-#define INFINIPATH_I_ERROR             0x0000000080000000ULL
-#define INFINIPATH_I_SPIOSENT          0x0000000040000000ULL
-#define INFINIPATH_I_SPIOBUFAVAIL      0x0000000020000000ULL
-#define INFINIPATH_I_GPIO              0x0000000010000000ULL
-#define INFINIPATH_I_JINT              0x0000000004000000ULL
-
-/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR                        0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC                     0x0000000000000002ULL
-#define INFINIPATH_E_RICRC                     0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN                        0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN                        0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN               0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN              0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR                        0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL                  0x0000000000000100ULL
-#define INFINIPATH_E_REBP                      0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW                   0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION               0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL               0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL               0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID                   0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN                   0x0000000000008000ULL
-#define INFINIPATH_E_RHDR                      0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK               0x0000000000020000ULL
-#define INFINIPATH_E_SENDSPECIALTRIGGER                0x0000000008000000ULL
-#define INFINIPATH_E_SDMADISABLED              0x0000000010000000ULL
-#define INFINIPATH_E_SMINPKTLEN                        0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN                        0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN                 0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN                   0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT            0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT           0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH             0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM           0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL                  0x0000002000000000ULL
-#define INFINIPATH_E_SENDBUFMISUSE             0x0000004000000000ULL
-#define INFINIPATH_E_SDMAGENMISMATCH           0x0000008000000000ULL
-#define INFINIPATH_E_SDMAOUTOFBOUND            0x0000010000000000ULL
-#define INFINIPATH_E_SDMATAILOUTOFBOUND                0x0000020000000000ULL
-#define INFINIPATH_E_SDMABASE                  0x0000040000000000ULL
-#define INFINIPATH_E_SDMA1STDESC               0x0000080000000000ULL
-#define INFINIPATH_E_SDMARPYTAG                        0x0000100000000000ULL
-#define INFINIPATH_E_SDMADWEN                  0x0000200000000000ULL
-#define INFINIPATH_E_SDMAMISSINGDW             0x0000400000000000ULL
-#define INFINIPATH_E_SDMAUNEXPDATA             0x0000800000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED           0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR               0x0002000000000000ULL
-#define INFINIPATH_E_RESET                     0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE                  0x0008000000000000ULL
-#define INFINIPATH_E_SDMADESCADDRMISALIGN      0x0010000000000000ULL
-#define INFINIPATH_E_INVALIDEEPCMD             0x0020000000000000ULL
-
-/*
- * this is used to print "common" packet errors only when the
- * __IPATH_ERRPKTDBG bit is set in ipath_debug.
- */
-#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
-               | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
-               | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
-               | INFINIPATH_E_REBP )
-
-/* Convenience for decoding Send DMA errors */
-#define INFINIPATH_E_SDMAERRS ( \
-       INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
-       INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
-       INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
-       INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
-       INFINIPATH_E_SDMAUNEXPDATA | \
-       INFINIPATH_E_SDMADESCADDRMISALIGN | \
-       INFINIPATH_E_SDMADISABLED | \
-       INFINIPATH_E_SENDBUFMISUSE)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
- *             bit 4: flag buffer, 5: datainfo, 6: header info */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF  0x1ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC  0x2ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
-/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-/* waldo specific -- find the rest in ipath_6110.c */
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
-#define INFINIPATH_HWE_MEMBISTFAILED   0x0040000000000000ULL
-
-/* kr_hwdiagctrl bits */
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_DC_COUNTERDISABLE            0x1000000000000000ULL
-#define INFINIPATH_DC_COUNTERWREN               0x2000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-
-/* kr_ibcctrl bits */
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
-#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
-/* wait for TS1, then go on */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
-#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
-#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1         /* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2                /* move to 0x21 */
-#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3       /* move to 0x31 */
-#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
-#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
-#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
-#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
-#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
-#define INFINIPATH_IBCC_LOOPBACK             0x8000000000000000ULL
-#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
-
-/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
-#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-
-#define INFINIPATH_IBCS_TXREADY       0x40000000
-#define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by
-   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
-#define INFINIPATH_IBCS_LT_STATE_DISABLED      0x00
-#define INFINIPATH_IBCS_LT_STATE_LINKUP                0x01
-#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE    0x02
-#define INFINIPATH_IBCS_LT_STATE_POLLQUIET     0x03
-#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY    0x04
-#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET    0x05
-#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE   0x08
-#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG    0x09
-#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT    0x0a
-#define INFINIPATH_IBCS_LT_STATE_CFGIDLE       0x0b
-#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN        0x0c
-#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT        0x0e
-#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE   0x0f
-/* link state machine states (shift by ibcs_ls_shift) */
-#define INFINIPATH_IBCS_L_STATE_DOWN           0x0
-#define INFINIPATH_IBCS_L_STATE_INIT           0x1
-#define INFINIPATH_IBCS_L_STATE_ARM            0x2
-#define INFINIPATH_IBCS_L_STATE_ACTIVE         0x3
-#define INFINIPATH_IBCS_L_STATE_ACT_DEFER      0x4
-
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
-#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
-#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
-
-/* kr_extctrl bits */
-#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
-#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
-#define INFINIPATH_EXTC_SERDESENABLE         0x80000000ULL
-#define INFINIPATH_EXTC_SERDESCONNECT        0x40000000ULL
-#define INFINIPATH_EXTC_SERDESENTRUNKING     0x20000000ULL
-#define INFINIPATH_EXTC_SERDESDISRXFIFO      0x10000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK1       0x08000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK2       0x04000000ULL
-#define INFINIPATH_EXTC_SERDESENENCDEC       0x02000000ULL
-#define INFINIPATH_EXTC_LED1SECPORT_ON       0x00000020ULL
-#define INFINIPATH_EXTC_LED2SECPORT_ON       0x00000010ULL
-#define INFINIPATH_EXTC_LED1PRIPORT_ON       0x00000008ULL
-#define INFINIPATH_EXTC_LED2PRIPORT_ON       0x00000004ULL
-#define INFINIPATH_EXTC_LEDGBLOK_ON          0x00000002ULL
-#define INFINIPATH_EXTC_LEDGBLERR_OFF        0x00000001ULL
-
-/* kr_partitionkey bits */
-#define INFINIPATH_PKEY_SIZE 16
-#define INFINIPATH_PKEY_MASK 0xFFFF
-#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
-
-/* kr_serdesconfig0 bits */
-#define INFINIPATH_SERDC0_RESET_MASK  0xfULL   /* overal reset bits */
-#define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL    /* pll reset */
-/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
-/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
-/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
-#define INFINIPATH_SERDC0_L1PWR_DN      0xF0ULL
-
-/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
-#define INFINIPATH_XGXS_RX_POL_SHIFT 19
-#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-
-
-/*
- * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
- * PIO send buffers.  This is well beyond anything currently
- * defined in the InfiniBand spec.
- */
-#define IPATH_PIO_MAXIBHDR 128
-
-typedef u64 ipath_err_t;
-
-/* The following change with the type of device, so
- * need to be part of the ipath_devdata struct, or
- * we could have problems plugging in devices of
- * different types (e.g. one HT, one PCIE)
- * in one system, to be managed by one driver.
- * On the other hand, this file is may also be included
- * by other code, so leave the declarations here
- * temporarily. Minor footprint issue if common-model
- * linker used, none if C89+ linker used.
- */
-
-/* mask of defined bits for various registers */
-extern u64 infinipath_i_bitsextant;
-extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-
-/* masks that are different in various chips, or only exist in some chips */
-extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-/*
- * These are the infinipath general register numbers (not offsets).
- * The kernel registers are used directly, those beyond the kernel
- * registers are calculated from one of the base registers.  The use of
- * an integer type doesn't allow type-checking as thorough as, say,
- * an enum but allows for better hiding of chip differences.
- */
-typedef const u16 ipath_kreg,  /* infinipath general registers */
- ipath_creg,                   /* infinipath counter registers */
- ipath_sreg;                   /* kernel-only, infinipath send registers */
-
-/*
- * These are the chip registers common to all infinipath chips, and
- * used both by the kernel and the diagnostics or other user code.
- * They are all implemented such that 64 bit accesses work.
- * Some implement no more than 32 bits.  Because 64 bit reads
- * require 2 HT cmds on opteron, we access those with 32 bit
- * reads for efficiency (they are written as 64 bits, since
- * the extra 32 bits are nearly free on writes, and it slightly reduces
- * complexity).  The rest are all accessed as 64 bits.
- */
-struct ipath_kregs {
-       /* These are the 32 bit group */
-       ipath_kreg kr_control;
-       ipath_kreg kr_counterregbase;
-       ipath_kreg kr_intmask;
-       ipath_kreg kr_intstatus;
-       ipath_kreg kr_pagealign;
-       ipath_kreg kr_portcnt;
-       ipath_kreg kr_rcvtidbase;
-       ipath_kreg kr_rcvtidcnt;
-       ipath_kreg kr_rcvegrbase;
-       ipath_kreg kr_rcvegrcnt;
-       ipath_kreg kr_scratch;
-       ipath_kreg kr_sendctrl;
-       ipath_kreg kr_sendpiobufbase;
-       ipath_kreg kr_sendpiobufcnt;
-       ipath_kreg kr_sendpiosize;
-       ipath_kreg kr_sendregbase;
-       ipath_kreg kr_userregbase;
-       /* These are the 64 bit group */
-       ipath_kreg kr_debugport;
-       ipath_kreg kr_debugportselect;
-       ipath_kreg kr_errorclear;
-       ipath_kreg kr_errormask;
-       ipath_kreg kr_errorstatus;
-       ipath_kreg kr_extctrl;
-       ipath_kreg kr_extstatus;
-       ipath_kreg kr_gpio_clear;
-       ipath_kreg kr_gpio_mask;
-       ipath_kreg kr_gpio_out;
-       ipath_kreg kr_gpio_status;
-       ipath_kreg kr_hwdiagctrl;
-       ipath_kreg kr_hwerrclear;
-       ipath_kreg kr_hwerrmask;
-       ipath_kreg kr_hwerrstatus;
-       ipath_kreg kr_ibcctrl;
-       ipath_kreg kr_ibcstatus;
-       ipath_kreg kr_intblocked;
-       ipath_kreg kr_intclear;
-       ipath_kreg kr_interruptconfig;
-       ipath_kreg kr_mdio;
-       ipath_kreg kr_partitionkey;
-       ipath_kreg kr_rcvbthqp;
-       ipath_kreg kr_rcvbufbase;
-       ipath_kreg kr_rcvbufsize;
-       ipath_kreg kr_rcvctrl;
-       ipath_kreg kr_rcvhdrcnt;
-       ipath_kreg kr_rcvhdrentsize;
-       ipath_kreg kr_rcvhdrsize;
-       ipath_kreg kr_rcvintmembase;
-       ipath_kreg kr_rcvintmemsize;
-       ipath_kreg kr_revision;
-       ipath_kreg kr_sendbuffererror;
-       ipath_kreg kr_sendpioavailaddr;
-       ipath_kreg kr_serdesconfig0;
-       ipath_kreg kr_serdesconfig1;
-       ipath_kreg kr_serdesstatus;
-       ipath_kreg kr_txintmembase;
-       ipath_kreg kr_txintmemsize;
-       ipath_kreg kr_xgxsconfig;
-       ipath_kreg kr_ibpllcfg;
-       /* use these two (and the following N ports) only with
-        * ipath_k*_kreg64_port(); not *kreg64() */
-       ipath_kreg kr_rcvhdraddr;
-       ipath_kreg kr_rcvhdrtailaddr;
-
-       /* remaining registers are not present on all types of infinipath
-          chips  */
-       ipath_kreg kr_rcvpktledcnt;
-       ipath_kreg kr_pcierbuftestreg0;
-       ipath_kreg kr_pcierbuftestreg1;
-       ipath_kreg kr_pcieq0serdesconfig0;
-       ipath_kreg kr_pcieq0serdesconfig1;
-       ipath_kreg kr_pcieq0serdesstatus;
-       ipath_kreg kr_pcieq1serdesconfig0;
-       ipath_kreg kr_pcieq1serdesconfig1;
-       ipath_kreg kr_pcieq1serdesstatus;
-       ipath_kreg kr_hrtbt_guid;
-       ipath_kreg kr_ibcddrctrl;
-       ipath_kreg kr_ibcddrstatus;
-       ipath_kreg kr_jintreload;
-
-       /* send dma related regs */
-       ipath_kreg kr_senddmabase;
-       ipath_kreg kr_senddmalengen;
-       ipath_kreg kr_senddmatail;
-       ipath_kreg kr_senddmahead;
-       ipath_kreg kr_senddmaheadaddr;
-       ipath_kreg kr_senddmabufmask0;
-       ipath_kreg kr_senddmabufmask1;
-       ipath_kreg kr_senddmabufmask2;
-       ipath_kreg kr_senddmastatus;
-
-       /* SerDes related regs (IBA7220-only) */
-       ipath_kreg kr_ibserdesctrl;
-       ipath_kreg kr_ib_epbacc;
-       ipath_kreg kr_ib_epbtrans;
-       ipath_kreg kr_pcie_epbacc;
-       ipath_kreg kr_pcie_epbtrans;
-       ipath_kreg kr_ib_ddsrxeq;
-};
-
-struct ipath_cregs {
-       ipath_creg cr_badformatcnt;
-       ipath_creg cr_erricrccnt;
-       ipath_creg cr_errlinkcnt;
-       ipath_creg cr_errlpcrccnt;
-       ipath_creg cr_errpkey;
-       ipath_creg cr_errrcvflowctrlcnt;
-       ipath_creg cr_err_rlencnt;
-       ipath_creg cr_errslencnt;
-       ipath_creg cr_errtidfull;
-       ipath_creg cr_errtidvalid;
-       ipath_creg cr_errvcrccnt;
-       ipath_creg cr_ibstatuschange;
-       ipath_creg cr_intcnt;
-       ipath_creg cr_invalidrlencnt;
-       ipath_creg cr_invalidslencnt;
-       ipath_creg cr_lbflowstallcnt;
-       ipath_creg cr_iblinkdowncnt;
-       ipath_creg cr_iblinkerrrecovcnt;
-       ipath_creg cr_ibsymbolerrcnt;
-       ipath_creg cr_pktrcvcnt;
-       ipath_creg cr_pktrcvflowctrlcnt;
-       ipath_creg cr_pktsendcnt;
-       ipath_creg cr_pktsendflowcnt;
-       ipath_creg cr_portovflcnt;
-       ipath_creg cr_rcvebpcnt;
-       ipath_creg cr_rcvovflcnt;
-       ipath_creg cr_rxdroppktcnt;
-       ipath_creg cr_senddropped;
-       ipath_creg cr_sendstallcnt;
-       ipath_creg cr_sendunderruncnt;
-       ipath_creg cr_unsupvlcnt;
-       ipath_creg cr_wordrcvcnt;
-       ipath_creg cr_wordsendcnt;
-       ipath_creg cr_vl15droppedpktcnt;
-       ipath_creg cr_rxotherlocalphyerrcnt;
-       ipath_creg cr_excessbufferovflcnt;
-       ipath_creg cr_locallinkintegrityerrcnt;
-       ipath_creg cr_rxvlerrcnt;
-       ipath_creg cr_rxdlidfltrcnt;
-       ipath_creg cr_psstat;
-       ipath_creg cr_psstart;
-       ipath_creg cr_psinterval;
-       ipath_creg cr_psrcvdatacount;
-       ipath_creg cr_psrcvpktscount;
-       ipath_creg cr_psxmitdatacount;
-       ipath_creg cr_psxmitpktscount;
-       ipath_creg cr_psxmitwaitcount;
-};
-
-#endif                         /* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644 (file)
index e541a01..0000000
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/*
- * Convert the AETH RNR timeout code into the number of milliseconds.
- */
-const u32 ib_ipath_rnr_table[32] = {
-       656,                    /* 0 */
-       1,                      /* 1 */
-       1,                      /* 2 */
-       1,                      /* 3 */
-       1,                      /* 4 */
-       1,                      /* 5 */
-       1,                      /* 6 */
-       1,                      /* 7 */
-       1,                      /* 8 */
-       1,                      /* 9 */
-       1,                      /* A */
-       1,                      /* B */
-       1,                      /* C */
-       1,                      /* D */
-       2,                      /* E */
-       2,                      /* F */
-       3,                      /* 10 */
-       4,                      /* 11 */
-       6,                      /* 12 */
-       8,                      /* 13 */
-       11,                     /* 14 */
-       16,                     /* 15 */
-       21,                     /* 16 */
-       31,                     /* 17 */
-       41,                     /* 18 */
-       62,                     /* 19 */
-       82,                     /* 1A */
-       123,                    /* 1B */
-       164,                    /* 1C */
-       246,                    /* 1D */
-       328,                    /* 1E */
-       492                     /* 1F */
-};
-
-/**
- * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
- * @qp: the QP
- *
- * Called with the QP s_lock held and interrupts disabled.
- * XXX Use a simple list for now.  We might need a priority
- * queue if we have lots of QPs waiting for RNR timeouts
- * but that should be rare.
- */
-void ipath_insert_rnr_queue(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
-       /* We already did a spin_lock_irqsave(), so just use spin_lock */
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&dev->rnrwait))
-               list_add(&qp->timerwait, &dev->rnrwait);
-       else {
-               struct list_head *l = &dev->rnrwait;
-               struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
-                                                 timerwait);
-
-               while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
-                       qp->s_rnr_timeout -= nqp->s_rnr_timeout;
-                       l = l->next;
-                       if (l->next == &dev->rnrwait) {
-                               nqp = NULL;
-                               break;
-                       }
-                       nqp = list_entry(l->next, struct ipath_qp,
-                                        timerwait);
-               }
-               if (nqp)
-                       nqp->s_rnr_timeout -= qp->s_rnr_timeout;
-               list_add(&qp->timerwait, l);
-       }
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_init_sge - Validate a RWQE and fill in the SGE state
- * @qp: the QP
- *
- * Return 1 if OK.
- */
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss)
-{
-       int i, j, ret;
-       struct ib_wc wc;
-
-       *lengthp = 0;
-       for (i = j = 0; i < wqe->num_sge; i++) {
-               if (wqe->sg_list[i].length == 0)
-                       continue;
-               /* Check LKEY */
-               if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-                       goto bad_lkey;
-               *lengthp += wqe->sg_list[i].length;
-               j++;
-       }
-       ss->num_sge = j;
-       ret = 1;
-       goto bail;
-
-bad_lkey:
-       memset(&wc, 0, sizeof(wc));
-       wc.wr_id = wqe->wr_id;
-       wc.status = IB_WC_LOC_PROT_ERR;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       /* Signal solicited completion event. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return 0 if no RWQE is available, otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
-{
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_rwq *wq;
-       struct ipath_srq *srq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       u32 tail;
-       int ret;
-
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       spin_lock_irqsave(&rq->lock, flags);
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               ret = 0;
-               goto unlock;
-       }
-
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       do {
-               if (unlikely(tail == wq->head)) {
-                       ret = 0;
-                       goto unlock;
-               }
-               /* Make sure entry is read after head index is read. */
-               smp_rmb();
-               wqe = get_rwqe_ptr(rq, tail);
-               if (++tail >= rq->size)
-                       tail = 0;
-               if (wr_id_only)
-                       break;
-               qp->r_sge.sg_list = qp->r_sg_list;
-       } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
-       qp->r_wr_id = wqe->wr_id;
-       wq->tail = tail;
-
-       ret = 1;
-       set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-                       goto bail;
-               }
-       }
-unlock:
-       spin_unlock_irqrestore(&rq->lock, flags);
-bail:
-       return ret;
-}
-
-/**
- * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from ipath_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ipath_ruc_loopback(struct ipath_qp *sqp)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ipath_swqe *wqe;
-       struct ipath_sge *sge;
-       unsigned long flags;
-       struct ib_wc wc;
-       u64 sdata;
-       atomic64_t *maddr;
-       enum ib_wc_status send_status;
-
-       /*
-        * Note that we check the responder QP state after
-        * checking the requester's state.
-        */
-       qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-
-       spin_lock_irqsave(&sqp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               goto unlock;
-
-       sqp->s_flags |= IPATH_S_BUSY;
-
-again:
-       if (sqp->s_last == sqp->s_head)
-               goto clr_busy;
-       wqe = get_swqe_ptr(sqp, sqp->s_last);
-
-       /* Return if it is not OK to start a new work reqeust. */
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
-                       goto clr_busy;
-               /* We are in the error state, flush the work request. */
-               send_status = IB_WC_WR_FLUSH_ERR;
-               goto flush_send;
-       }
-
-       /*
-        * We can rely on the entry not changing without the s_lock
-        * being held until we update s_last.
-        * We increment s_cur to indicate s_last is in progress.
-        */
-       if (sqp->s_last == sqp->s_cur) {
-               if (++sqp->s_cur >= sqp->s_size)
-                       sqp->s_cur = 0;
-       }
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               /*
-                * For RC, the requester would timeout and retry so
-                * shortcut the timeouts and just signal too many retries.
-                */
-               if (sqp->ibqp.qp_type == IB_QPT_RC)
-                       send_status = IB_WC_RETRY_EXC_ERR;
-               else
-                       send_status = IB_WC_SUCCESS;
-               goto serr;
-       }
-
-       memset(&wc, 0, sizeof wc);
-       send_status = IB_WC_SUCCESS;
-
-       sqp->s_sge.sge = wqe->sg_list[0];
-       sqp->s_sge.sg_list = wqe->sg_list + 1;
-       sqp->s_sge.num_sge = wqe->wr.num_sge;
-       sqp->s_len = wqe->length;
-       switch (wqe->wr.opcode) {
-       case IB_WR_SEND_WITH_IMM:
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               /* FALLTHROUGH */
-       case IB_WR_SEND:
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               break;
-
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               /* FALLTHROUGH */
-       case IB_WR_RDMA_WRITE:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               if (wqe->length == 0)
-                       break;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_WRITE)))
-                       goto acc_err;
-               break;
-
-       case IB_WR_RDMA_READ:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_READ)))
-                       goto acc_err;
-               qp->r_sge.sge = wqe->sg_list[0];
-               qp->r_sge.sg_list = wqe->sg_list + 1;
-               qp->r_sge.num_sge = wqe->wr.num_sge;
-               break;
-
-       case IB_WR_ATOMIC_CMP_AND_SWP:
-       case IB_WR_ATOMIC_FETCH_AND_ADD:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-                                           wqe->atomic_wr.remote_addr,
-                                           wqe->atomic_wr.rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto acc_err;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->atomic_wr.compare_add;
-               *(u64 *) sqp->s_sge.sge.vaddr =
-                       (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->atomic_wr.swap);
-               goto send_comp;
-
-       default:
-               send_status = IB_WC_LOC_QP_OP_ERR;
-               goto serr;
-       }
-
-       sge = &sqp->s_sge.sge;
-       while (sqp->s_len) {
-               u32 len = sqp->s_len;
-
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--sqp->s_sge.num_sge)
-                               *sge = *sqp->s_sge.sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               sqp->s_len -= len;
-       }
-
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto send_comp;
-
-       if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-       else
-               wc.opcode = IB_WC_RECV;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.byte_len = wqe->length;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = qp->remote_qpn;
-       wc.slid = qp->remote_ah_attr.dlid;
-       wc.sl = qp->remote_ah_attr.sl;
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-flush_send:
-       sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-       ipath_send_complete(sqp, wqe, send_status);
-       goto again;
-
-rnr_nak:
-       /* Handle RNR NAK */
-       if (qp->ibqp.qp_type == IB_QPT_UC)
-               goto send_comp;
-       /*
-        * Note: we don't need the s_lock held since the BUSY flag
-        * makes this single threaded.
-        */
-       if (sqp->s_rnr_retry == 0) {
-               send_status = IB_WC_RNR_RETRY_EXC_ERR;
-               goto serr;
-       }
-       if (sqp->s_rnr_retry_cnt < 7)
-               sqp->s_rnr_retry--;
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
-               goto clr_busy;
-       sqp->s_flags |= IPATH_S_WAITING;
-       dev->n_rnr_naks++;
-       sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
-       ipath_insert_rnr_queue(sqp);
-       goto clr_busy;
-
-inv_err:
-       send_status = IB_WC_REM_INV_REQ_ERR;
-       wc.status = IB_WC_LOC_QP_OP_ERR;
-       goto err;
-
-acc_err:
-       send_status = IB_WC_REM_ACCESS_ERR;
-       wc.status = IB_WC_LOC_PROT_ERR;
-err:
-       /* responder goes to error state */
-       ipath_rc_error(qp, wc.status);
-
-serr:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       ipath_send_complete(sqp, wqe, send_status);
-       if (sqp->ibqp.qp_type == IB_QPT_RC) {
-               int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-               sqp->s_flags &= ~IPATH_S_BUSY;
-               spin_unlock_irqrestore(&sqp->s_lock, flags);
-               if (lastwqe) {
-                       struct ib_event ev;
-
-                       ev.device = sqp->ibqp.device;
-                       ev.element.qp = &sqp->ibqp;
-                       ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-                       sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-               }
-               goto done;
-       }
-clr_busy:
-       sqp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-       if (qp && atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
-{
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-           qp->ibqp.qp_type == IB_QPT_SMI) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_no_bufs_available - tell the layer driver we need buffers
- * @qp: the QP that caused the problem
- * @dev: the device we ran out of buffers on
- *
- * Called when we run out of PIO buffers.
- * If we are now in the error state, return zero to flush the
- * send work request.
- */
-static int ipath_no_bufs_available(struct ipath_qp *qp,
-                                   struct ipath_ibdev *dev)
-{
-       unsigned long flags;
-       int ret = 1;
-
-       /*
-        * Note that as soon as want_buffer() is called and
-        * possibly before it returns, ipath_ib_piobufavail()
-        * could be called. Therefore, put QP on the piowait list before
-        * enabling the PIO avail interrupt.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-               dev->n_piowait++;
-               qp->s_flags |= IPATH_S_WAITING;
-               qp->s_flags &= ~IPATH_S_BUSY;
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->piowait))
-                       list_add_tail(&qp->piowait, &dev->piowait);
-               spin_unlock(&dev->pending_lock);
-       } else
-               ret = 0;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (ret)
-               want_buffer(dev->dd, qp);
-       return ret;
-}
-
-/**
- * ipath_make_grh - construct a GRH header
- * @dev: a pointer to the ipath device
- * @hdr: a pointer to the GRH header being constructed
- * @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
- * @nwords: the number of 32 bit words of data being sent
- *
- * Return the size of the header in 32 bit words.
- */
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords)
-{
-       hdr->version_tclass_flow =
-               cpu_to_be32((6 << 28) |
-                           (grh->traffic_class << 20) |
-                           grh->flow_label);
-       hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
-       /* next_hdr is defined by C8-7 in ch. 8.4.1 */
-       hdr->next_hdr = 0x1B;
-       hdr->hop_limit = grh->hop_limit;
-       /* The SGID is 32-bit aligned. */
-       hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-       hdr->sgid.global.interface_id = dev->dd->ipath_guid;
-       hdr->dgid = grh->dgid;
-
-       /* GRH header size in 32-bit words. */
-       return sizeof(struct ib_grh) / sizeof(u32);
-}
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2)
-{
-       u16 lrh0;
-       u32 nwords;
-       u32 extra_bytes;
-
-       /* Construct the header. */
-       extra_bytes = -qp->s_cur_size & 3;
-       nwords = (qp->s_cur_size + extra_bytes) >> 2;
-       lrh0 = IPATH_LRH_BTH;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &qp->remote_ah_attr.grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-       }
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
-                                      qp->remote_ah_attr.src_path_bits);
-       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       bth0 |= extra_bytes << 20;
-       ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(bth2);
-}
-
-/**
- * ipath_do_send - perform a send on a QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, two threads could send packets out of order.
- */
-void ipath_do_send(unsigned long data)
-{
-       struct ipath_qp *qp = (struct ipath_qp *)data;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       int (*make_req)(struct ipath_qp *qp);
-       unsigned long flags;
-
-       if ((qp->ibqp.qp_type == IB_QPT_RC ||
-            qp->ibqp.qp_type == IB_QPT_UC) &&
-           qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
-               ipath_ruc_loopback(qp);
-               goto bail;
-       }
-
-       if (qp->ibqp.qp_type == IB_QPT_RC)
-              make_req = ipath_make_rc_req;
-       else if (qp->ibqp.qp_type == IB_QPT_UC)
-              make_req = ipath_make_uc_req;
-       else
-              make_req = ipath_make_ud_req;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               goto bail;
-       }
-
-       qp->s_flags |= IPATH_S_BUSY;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-again:
-       /* Check for a constructed packet to be sent. */
-       if (qp->s_hdrwords != 0) {
-               /*
-                * If no PIO bufs are available, return.  An interrupt will
-                * call ipath_ib_piobufavail() when one is available.
-                */
-               if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
-                                    qp->s_cur_sge, qp->s_cur_size)) {
-                       if (ipath_no_bufs_available(qp, dev))
-                               goto bail;
-               }
-               dev->n_unicast_xmit++;
-               /* Record that we sent the packet and s_hdr is empty. */
-               qp->s_hdrwords = 0;
-       }
-
-       if (make_req(qp))
-               goto again;
-
-bail:;
-}
-
-/*
- * This should be called with s_lock held.
- */
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status)
-{
-       u32 old_last, last;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               return;
-
-       /* See ch. 11.2.4.1 and 10.7.3.1 */
-       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-           status != IB_WC_SUCCESS) {
-               struct ib_wc wc;
-
-               memset(&wc, 0, sizeof wc);
-               wc.wr_id = wqe->wr.wr_id;
-               wc.status = status;
-               wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-               wc.qp = &qp->ibqp;
-               if (status == IB_WC_SUCCESS)
-                       wc.byte_len = wqe->length;
-               ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-                              status != IB_WC_SUCCESS);
-       }
-
-       old_last = last = qp->s_last;
-       if (++last >= qp->s_size)
-               last = 0;
-       qp->s_last = last;
-       if (qp->s_cur == old_last)
-               qp->s_cur = last;
-       if (qp->s_tail == old_last)
-               qp->s_tail = last;
-       if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-               qp->s_draining = 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644 (file)
index 1ffc06a..0000000
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
-
-static void vl15_watchdog_enq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
-               unsigned long interval = (HZ + 19) / 20;
-               dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
-               add_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_deq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
-               unsigned long interval = (HZ + 19) / 20;
-               mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
-       } else {
-               del_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_timeout(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
-               ipath_dbg("vl15 watchdog timeout - clearing\n");
-               ipath_cancel_sends(dd, 1);
-               ipath_hol_down(dd);
-       } else {
-               ipath_dbg("vl15 watchdog timeout - "
-                         "condition already cleared\n");
-       }
-}
-
-static void unmap_desc(struct ipath_devdata *dd, unsigned head)
-{
-       __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
-       u64 desc[2];
-       dma_addr_t addr;
-       size_t len;
-
-       desc[0] = le64_to_cpu(descqp[0]);
-       desc[1] = le64_to_cpu(descqp[1]);
-
-       addr = (desc[1] << 32) | (desc[0] >> 32);
-       len = (desc[0] >> 14) & (0x7ffULL << 2);
-       dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
-}
-
-/*
- * ipath_sdma_lock should be locked before calling this.
- */
-int ipath_sdma_make_progress(struct ipath_devdata *dd)
-{
-       struct list_head *lp = NULL;
-       struct ipath_sdma_txreq *txp = NULL;
-       u16 dmahead;
-       u16 start_idx = 0;
-       int progress = 0;
-
-       if (!list_empty(&dd->ipath_sdma_activelist)) {
-               lp = dd->ipath_sdma_activelist.next;
-               txp = list_entry(lp, struct ipath_sdma_txreq, list);
-               start_idx = txp->start_idx;
-       }
-
-       /*
-        * Read the SDMA head register in order to know that the
-        * interrupt clear has been written to the chip.
-        * Otherwise, we may not get an interrupt for the last
-        * descriptor in the queue.
-        */
-       dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
-       /* sanity check return value for error handling (chip reset, etc.) */
-       if (dmahead >= dd->ipath_sdma_descq_cnt)
-               goto done;
-
-       while (dd->ipath_sdma_descq_head != dmahead) {
-               if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
-                   dd->ipath_sdma_descq_head == start_idx) {
-                       unmap_desc(dd, dd->ipath_sdma_descq_head);
-                       start_idx++;
-                       if (start_idx == dd->ipath_sdma_descq_cnt)
-                               start_idx = 0;
-               }
-
-               /* increment free count and head */
-               dd->ipath_sdma_descq_removed++;
-               if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
-                       dd->ipath_sdma_descq_head = 0;
-
-               if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
-                       /* move to notify list */
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(lp, &dd->ipath_sdma_notifylist);
-                       if (!list_empty(&dd->ipath_sdma_activelist)) {
-                               lp = dd->ipath_sdma_activelist.next;
-                               txp = list_entry(lp, struct ipath_sdma_txreq,
-                                                list);
-                               start_idx = txp->start_idx;
-                       } else {
-                               lp = NULL;
-                               txp = NULL;
-                       }
-               }
-               progress = 1;
-       }
-
-       if (progress)
-               tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-done:
-       return progress;
-}
-
-static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
-{
-       struct ipath_sdma_txreq *txp, *txp_next;
-
-       list_for_each_entry_safe(txp, txp_next, list, list) {
-               list_del_init(&txp->list);
-
-               if (txp->callback)
-                       (*txp->callback)(txp->callback_cookie,
-                                        txp->callback_status);
-       }
-}
-
-static void sdma_notify_taskbody(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       struct list_head list;
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       list_splice_init(&dd->ipath_sdma_notifylist, &list);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       ipath_sdma_notify(dd, &list);
-
-       /*
-        * The IB verbs layer needs to see the callback before getting
-        * the call to ipath_ib_piobufavail() because the callback
-        * handles releasing resources the next send will need.
-        * Otherwise, we could do these calls in
-        * ipath_sdma_make_progress().
-        */
-       ipath_ib_piobufavail(dd->verbs_dev);
-}
-
-static void sdma_notify_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               sdma_notify_taskbody(dd);
-}
-
-static void dump_sdma_state(struct ipath_devdata *dd)
-{
-       unsigned long reg;
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
-       ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
-       ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-       ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-       ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
-}
-
-static void sdma_abort_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       u64 status;
-       unsigned long flags;
-
-       if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               return;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
-
-       /* nothing to do */
-       if (status == IPATH_SDMA_ABORT_NONE)
-               goto unlock;
-
-       /* ipath_sdma_abort() is done, waiting for interrupt */
-       if (status == IPATH_SDMA_ABORT_DISARMED) {
-               if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
-                       goto resched_noprint;
-               /* give up, intr got lost somewhere */
-               ipath_dbg("give up waiting for SDMADISABLED intr\n");
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               status = IPATH_SDMA_ABORT_ABORTED;
-       }
-
-       /* everything is stopped, time to clean up and restart */
-       if (status == IPATH_SDMA_ABORT_ABORTED) {
-               struct ipath_sdma_txreq *txp, *txpnext;
-               u64 hwstatus;
-               int notify = 0;
-
-               hwstatus = ipath_read_kreg64(dd,
-                               dd->ipath_kregs->kr_senddmastatus);
-
-               if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
-                                IPATH_SDMA_STATUS_ABORT_IN_PROG             |
-                                IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
-                   !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
-                       if (dd->ipath_sdma_reset_wait > 0) {
-                               /* not done shutting down sdma */
-                               --dd->ipath_sdma_reset_wait;
-                               goto resched;
-                       }
-                       ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
-                               "status after SDMA reset, continuing\n");
-                       dump_sdma_state(dd);
-               }
-
-               /* dequeue all "sent" requests */
-               list_for_each_entry_safe(txp, txpnext,
-                                        &dd->ipath_sdma_activelist, list) {
-                       txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-                       notify = 1;
-               }
-               if (notify)
-                       tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-               /* reset our notion of head and tail */
-               dd->ipath_sdma_descq_tail = 0;
-               dd->ipath_sdma_descq_head = 0;
-               dd->ipath_sdma_head_dma[0] = 0;
-               dd->ipath_sdma_generation = 0;
-               dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
-
-               /* Reset SendDmaLenGen */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
-                       (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
-
-               /* done with sdma state for a bit */
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-               /*
-                * Don't restart sdma here (with the exception
-                * below). Wait until link is up to ACTIVE.  VL15 MADs
-                * used to bring the link up use PIO, and multiple link
-                * transitions otherwise cause the sdma engine to be
-                * stopped and started multiple times.
-                * The disable is done here, including the shadow,
-                * so the state is kept consistent.
-                * See ipath_restart_sdma() for the actual starting
-                * of sdma.
-                */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* make sure I see next message */
-               dd->ipath_sdma_abort_jiffies = 0;
-
-               /*
-                * Not everything that takes SDMA offline is a link
-                * status change.  If the link was up, restart SDMA.
-                */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       ipath_restart_sdma(dd);
-
-               goto done;
-       }
-
-resched:
-       /*
-        * for now, keep spinning
-        * JAG - this is bad to just have default be a loop without
-        * state change
-        */
-       if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
-               ipath_dbg("looping with status 0x%08lx\n",
-                         dd->ipath_sdma_status);
-               dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
-       }
-resched_noprint:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       return;
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-done:
-       return;
-}
-
-/*
- * This is called from interrupt context.
- */
-void ipath_sdma_intr(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       (void) ipath_sdma_make_progress(dd);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-}
-
-static int alloc_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-
-       /* Allocate memory for SendDMA descriptor FIFO */
-       dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
-               SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
-                       "FIFO memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       dd->ipath_sdma_descq_cnt =
-               SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
-
-       /* Allocate memory for DMA of head register to memory */
-       dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
-               PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
-       if (!dd->ipath_sdma_head_dma) {
-               ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
-               ret = -ENOMEM;
-               goto cleanup_descq;
-       }
-       dd->ipath_sdma_head_dma[0] = 0;
-
-       setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
-                       (unsigned long)dd);
-
-       atomic_set(&dd->ipath_sdma_vl15_count, 0);
-
-       goto done;
-
-cleanup_descq:
-       dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-               (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
-       dd->ipath_sdma_descq = NULL;
-       dd->ipath_sdma_descq_phys = 0;
-done:
-       return ret;
-}
-
-int setup_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       unsigned i, n;
-       u64 tmp64;
-       u64 senddmabufmask[3] = { 0 };
-       unsigned long flags;
-
-       ret = alloc_sdma(dd);
-       if (ret)
-               goto done;
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "SendDMA memory not allocated\n");
-               goto done;
-       }
-
-       /*
-        * Set initial status as if we had been up, then gone down.
-        * This lets initial start on transition to ACTIVE be the
-        * same as restart after link flap.
-        */
-       dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
-       dd->ipath_sdma_abort_jiffies = 0;
-       dd->ipath_sdma_generation = 0;
-       dd->ipath_sdma_descq_tail = 0;
-       dd->ipath_sdma_descq_head = 0;
-       dd->ipath_sdma_descq_removed = 0;
-       dd->ipath_sdma_descq_added = 0;
-
-       /* Set SendDmaBase */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
-                        dd->ipath_sdma_descq_phys);
-       /* Set SendDmaLenGen */
-       tmp64 = dd->ipath_sdma_descq_cnt;
-       tmp64 |= 1<<18; /* enable generation checking */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
-       /* Set SendDmaTail */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
-                        dd->ipath_sdma_descq_tail);
-       /* Set SendDmaHeadAddr */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
-                        dd->ipath_sdma_head_phys);
-
-       /*
-        * Reserve all the former "kernel" piobufs, using high number range
-        * so we get as many 4K buffers as possible
-        */
-       n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
-       ipath_chg_pioavailkernel(dd, i, n - i , 0);
-       for (; i < n; ++i) {
-               unsigned word = i / 64;
-               unsigned bit = i & 63;
-               BUG_ON(word >= 3);
-               senddmabufmask[word] |= 1ULL << bit;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
-                        senddmabufmask[0]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
-                        senddmabufmask[1]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
-                        senddmabufmask[2]);
-
-       INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
-       INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
-
-       tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
-                    (unsigned long) dd);
-       tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
-                    (unsigned long) dd);
-
-       /*
-        * No use to turn on SDMA here, as link is probably not ACTIVE
-        * Just mark it RUNNING and enable the interrupt, and let the
-        * ipath_restart_sdma() on link transition to ACTIVE actually
-        * enable it.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-done:
-       return ret;
-}
-
-void teardown_sdma(struct ipath_devdata *dd)
-{
-       struct ipath_sdma_txreq *txp, *txpnext;
-       unsigned long flags;
-       dma_addr_t sdma_head_phys = 0;
-       dma_addr_t sdma_descq_phys = 0;
-       void *sdma_descq = NULL;
-       void *sdma_head_dma = NULL;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       tasklet_kill(&dd->ipath_sdma_abort_task);
-       tasklet_kill(&dd->ipath_sdma_notify_task);
-
-       /* turn off sdma */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       /* dequeue all "sent" requests */
-       list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
-                                list) {
-               txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
-               if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                       vl15_watchdog_deq(dd);
-               list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       sdma_notify_taskbody(dd);
-
-       del_timer_sync(&dd->ipath_sdma_vl15_timer);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       dd->ipath_sdma_abort_jiffies = 0;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
-
-       if (dd->ipath_sdma_head_dma) {
-               sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
-               sdma_head_phys = dd->ipath_sdma_head_phys;
-               dd->ipath_sdma_head_dma = NULL;
-               dd->ipath_sdma_head_phys = 0;
-       }
-
-       if (dd->ipath_sdma_descq) {
-               sdma_descq = dd->ipath_sdma_descq;
-               sdma_descq_phys = dd->ipath_sdma_descq_phys;
-               dd->ipath_sdma_descq = NULL;
-               dd->ipath_sdma_descq_phys = 0;
-       }
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       if (sdma_head_dma)
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 sdma_head_dma, sdma_head_phys);
-
-       if (sdma_descq)
-               dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-                                 sdma_descq, sdma_descq_phys);
-}
-
-/*
- * [Re]start SDMA, if we use it, and it's not already OK.
- * This is called on transition to link ACTIVE, either the first or
- * subsequent times.
- */
-void ipath_restart_sdma(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int needed = 1;
-
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               goto bail;
-
-       /*
-        * First, make sure we should, which is to say,
-        * check that we are "RUNNING" (not in teardown)
-        * and not "SHUTDOWN"
-        */
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
-               || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       needed = 0;
-       else {
-               __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!needed) {
-               ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
-                       dd->ipath_sdma_status);
-               goto bail;
-       }
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /*
-        * First clear, just to be safe. Enable is only done
-        * in chip on 0->1 transition
-        */
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* notify upper layers */
-       ipath_ib_piobufavail(dd->verbs_dev);
-
-bail:
-       return;
-}
-
-static inline void make_sdma_desc(struct ipath_devdata *dd,
-       u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
-{
-       WARN_ON(addr & 3);
-       /* SDmaPhyAddr[47:32] */
-       sdmadesc[1] = addr >> 32;
-       /* SDmaPhyAddr[31:0] */
-       sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
-       /* SDmaGeneration[1:0] */
-       sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
-       /* SDmaDwordCount[10:0] */
-       sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
-       /* SDmaBufOffset[12:2] */
-       sdmadesc[0] |= dwoffset & 0x7ffULL;
-}
-
-/*
- * This function queues one IB packet onto the send DMA queue per call.
- * The caller is responsible for checking:
- * 1) The number of send DMA descriptor entries is less than the size of
- *    the descriptor queue.
- * 2) The IB SGE addresses and lengths are 32-bit aligned
- *    (except possibly the last SGE's length)
- * 3) The SGE addresses are suitable for passing to dma_map_single().
- */
-int ipath_sdma_verbs_send(struct ipath_devdata *dd,
-       struct ipath_sge_state *ss, u32 dwords,
-       struct ipath_verbs_txreq *tx)
-{
-
-       unsigned long flags;
-       struct ipath_sge *sge;
-       int ret = 0;
-       u16 tail;
-       __le64 *descqp;
-       u64 sdmadesc[2];
-       u32 dwoffset;
-       dma_addr_t addr;
-
-       if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
-               ipath_dbg("packet size %X > ibmax %X, fail\n",
-                       tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
-               ret = -EMSGSIZE;
-               goto fail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-retry:
-       if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
-               ret = -EBUSY;
-               goto unlock;
-       }
-
-       if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
-               if (ipath_sdma_make_progress(dd))
-                       goto retry;
-               ret = -ENOBUFS;
-               goto unlock;
-       }
-
-       addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
-                             tx->map_len, DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, addr))
-               goto ioerr;
-
-       dwoffset = tx->map_len >> 2;
-       make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
-
-       /* SDmaFirstDesc */
-       sdmadesc[0] |= 1ULL << 12;
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-               sdmadesc[0] |= 1ULL << 14;      /* SDmaUseLargeBuf */
-
-       /* write to the descq */
-       tail = dd->ipath_sdma_descq_tail;
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-       *descqp++ = cpu_to_le64(sdmadesc[0]);
-       *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
-               tx->txreq.start_idx = tail;
-
-       /* increment the tail */
-       if (++tail == dd->ipath_sdma_descq_cnt) {
-               tail = 0;
-               descqp = &dd->ipath_sdma_descq[0].qw[0];
-               ++dd->ipath_sdma_generation;
-       }
-
-       sge = &ss->sge;
-       while (dwords) {
-               u32 dw;
-               u32 len;
-
-               len = dwords << 2;
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               dw = (len + 3) >> 2;
-               addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
-                                     DMA_TO_DEVICE);
-               if (dma_mapping_error(&dd->pcidev->dev, addr))
-                       goto unmap;
-               make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
-               /* SDmaUseLargeBuf has to be set in every descriptor */
-               if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-                       sdmadesc[0] |= 1ULL << 14;
-               /* write to the descq */
-               *descqp++ = cpu_to_le64(sdmadesc[0]);
-               *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-               /* increment the tail */
-               if (++tail == dd->ipath_sdma_descq_cnt) {
-                       tail = 0;
-                       descqp = &dd->ipath_sdma_descq[0].qw[0];
-                       ++dd->ipath_sdma_generation;
-               }
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-
-               dwoffset += dw;
-               dwords -= dw;
-       }
-
-       if (!tail)
-               descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
-       descqp -= 2;
-       /* SDmaLastDesc */
-       descqp[0] |= cpu_to_le64(1ULL << 11);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
-               /* SDmaIntReq */
-               descqp[0] |= cpu_to_le64(1ULL << 15);
-       }
-
-       /* Commit writes to memory and advance the tail on the chip */
-       wmb();
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-
-       tx->txreq.next_descq_idx = tail;
-       tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
-       dd->ipath_sdma_descq_tail = tail;
-       dd->ipath_sdma_descq_added += tx->txreq.sg_count;
-       list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
-               vl15_watchdog_enq(dd);
-       goto unlock;
-
-unmap:
-       while (tail != dd->ipath_sdma_descq_tail) {
-               if (!tail)
-                       tail = dd->ipath_sdma_descq_cnt - 1;
-               else
-                       tail--;
-               unmap_desc(dd, tail);
-       }
-ioerr:
-       ret = -EIO;
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-fail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644 (file)
index 2627198..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       unsigned long flags;
-       int ret;
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&srq->rq.lock, flags);
-               wq = srq->rq.wq;
-               next = wq->head + 1;
-               if (next >= srq->rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&srq->rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&srq->rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&srq->rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libipathverbs when creating a user SRQ
- */
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-       struct ipath_srq *srq;
-       u32 sz;
-       struct ib_srq *ret;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-               ret = ERR_PTR(-ENOSYS);
-               goto done;
-       }
-
-       if (srq_init_attr->attr.max_wr == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
-           (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-       if (!srq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       srq->rq.size = srq_init_attr->attr.max_wr + 1;
-       srq->rq.max_sge = srq_init_attr->attr.max_sge;
-       sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-               sizeof(struct ipath_rwqe);
-       srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
-       if (!srq->rq.wq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_srq;
-       }
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-               u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
-
-               srq->ip =
-                   ipath_create_mmap_info(dev, s,
-                                          ibpd->uobject->context,
-                                          srq->rq.wq);
-               if (!srq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wq;
-               }
-
-               err = ib_copy_to_udata(udata, &srq->ip->offset,
-                                      sizeof(srq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               srq->ip = NULL;
-
-       /*
-        * ib_create_srq() will initialize srq->ibsrq.
-        */
-       spin_lock_init(&srq->rq.lock);
-       srq->rq.wq->head = 0;
-       srq->rq.wq->tail = 0;
-       srq->limit = srq_init_attr->attr.srq_limit;
-
-       spin_lock(&dev->n_srqs_lock);
-       if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-               spin_unlock(&dev->n_srqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_srqs_allocated++;
-       spin_unlock(&dev->n_srqs_lock);
-
-       if (srq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &srq->ibsrq;
-       goto done;
-
-bail_ip:
-       kfree(srq->ip);
-bail_wq:
-       vfree(srq->rq.wq);
-bail_srq:
-       kfree(srq);
-done:
-       return ret;
-}
-
-/**
- * ipath_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for ipathverbs.so
- */
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       int ret = 0;
-
-       if (attr_mask & IB_SRQ_MAX_WR) {
-               struct ipath_rwq *owq;
-               struct ipath_rwqe *p;
-               u32 sz, size, n, head, tail;
-
-               /* Check that the requested sizes are below the limits. */
-               if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-                   ((attr_mask & IB_SRQ_LIMIT) ?
-                    attr->srq_limit : srq->limit) > attr->max_wr) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               sz = sizeof(struct ipath_rwqe) +
-                       srq->rq.max_sge * sizeof(struct ib_sge);
-               size = attr->max_wr + 1;
-               wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
-               if (!wq) {
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               /* Check that we can write the offset to mmap. */
-               if (udata && udata->inlen >= sizeof(__u64)) {
-                       __u64 offset_addr;
-                       __u64 offset = 0;
-
-                       ret = ib_copy_from_udata(&offset_addr, udata,
-                                                sizeof(offset_addr));
-                       if (ret)
-                               goto bail_free;
-                       udata->outbuf =
-                               (void __user *) (unsigned long) offset_addr;
-                       ret = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (ret)
-                               goto bail_free;
-               }
-
-               spin_lock_irq(&srq->rq.lock);
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               owq = srq->rq.wq;
-               head = owq->head;
-               if (head >= srq->rq.size)
-                       head = 0;
-               tail = owq->tail;
-               if (tail >= srq->rq.size)
-                       tail = 0;
-               n = head;
-               if (n < tail)
-                       n += srq->rq.size - tail;
-               else
-                       n -= tail;
-               if (size <= n) {
-                       ret = -EINVAL;
-                       goto bail_unlock;
-               }
-               n = 0;
-               p = wq->wq;
-               while (tail != head) {
-                       struct ipath_rwqe *wqe;
-                       int i;
-
-                       wqe = get_rwqe_ptr(&srq->rq, tail);
-                       p->wr_id = wqe->wr_id;
-                       p->num_sge = wqe->num_sge;
-                       for (i = 0; i < wqe->num_sge; i++)
-                               p->sg_list[i] = wqe->sg_list[i];
-                       n++;
-                       p = (struct ipath_rwqe *)((char *) p + sz);
-                       if (++tail >= srq->rq.size)
-                               tail = 0;
-               }
-               srq->rq.wq = wq;
-               srq->rq.size = size;
-               wq->head = n;
-               wq->tail = 0;
-               if (attr_mask & IB_SRQ_LIMIT)
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-
-               vfree(owq);
-
-               if (srq->ip) {
-                       struct ipath_mmap_info *ip = srq->ip;
-                       struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
-                       u32 s = sizeof(struct ipath_rwq) + size * sz;
-
-                       ipath_update_mmap_info(dev, ip, s, wq);
-
-                       /*
-                        * Return the offset to mmap.
-                        * See ipath_mmap() for details.
-                        */
-                       if (udata && udata->inlen >= sizeof(__u64)) {
-                               ret = ib_copy_to_udata(udata, &ip->offset,
-                                                      sizeof(ip->offset));
-                               if (ret)
-                                       goto bail;
-                       }
-
-                       spin_lock_irq(&dev->pending_lock);
-                       if (list_empty(&ip->pending_mmaps))
-                               list_add(&ip->pending_mmaps,
-                                        &dev->pending_mmaps);
-                       spin_unlock_irq(&dev->pending_lock);
-               }
-       } else if (attr_mask & IB_SRQ_LIMIT) {
-               spin_lock_irq(&srq->rq.lock);
-               if (attr->srq_limit >= srq->rq.size)
-                       ret = -EINVAL;
-               else
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-       }
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&srq->rq.lock);
-bail_free:
-       vfree(wq);
-bail:
-       return ret;
-}
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-
-       attr->max_wr = srq->rq.size - 1;
-       attr->max_sge = srq->rq.max_sge;
-       attr->srq_limit = srq->limit;
-       return 0;
-}
-
-/**
- * ipath_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int ipath_destroy_srq(struct ib_srq *ibsrq)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_ibdev *dev = to_idev(ibsrq->device);
-
-       spin_lock(&dev->n_srqs_lock);
-       dev->n_srqs_allocated--;
-       spin_unlock(&dev->n_srqs_lock);
-       if (srq->ip)
-               kref_put(&srq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(srq->rq.wq);
-       kfree(srq);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644 (file)
index f63e143..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_kernel.h"
-
-struct infinipath_stats ipath_stats;
-
-/**
- * ipath_snap_cntr - snapshot a chip counter
- * @dd: the infinipath device
- * @creg: the counter to snapshot
- *
- * called from add_timer and user counter read calls, to deal with
- * counters that wrap in "human time".  The words sent and received, and
- * the packets sent and received are all that we worry about.  For now,
- * at least, we don't worry about error counters, because if they wrap
- * that quickly, we probably don't care.  We may eventually just make this
- * handle all the counters.  word counters can wrap in about 20 seconds
- * of full bandwidth traffic, packet counters in a few hours.
- */
-
-u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
-{
-       u32 val, reg64 = 0;
-       u64 val64;
-       unsigned long t0, t1;
-       u64 ret;
-
-       t0 = jiffies;
-       /* If fast increment counters are only 32 bits, snapshot them,
-        * and maintain them as 64bit values in the driver */
-       if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
-           (creg == dd->ipath_cregs->cr_wordsendcnt ||
-            creg == dd->ipath_cregs->cr_wordrcvcnt ||
-            creg == dd->ipath_cregs->cr_pktsendcnt ||
-            creg == dd->ipath_cregs->cr_pktrcvcnt)) {
-               val64 = ipath_read_creg(dd, creg);
-               val = val64 == ~0ULL ? ~0U : 0;
-               reg64 = 1;
-       } else                  /* val64 just to keep gcc quiet... */
-               val64 = val = ipath_read_creg32(dd, creg);
-       /*
-        * See if a second has passed.  This is just a way to detect things
-        * that are quite broken.  Normally this should take just a few
-        * cycles (the check is for long enough that we don't care if we get
-        * pre-empted.)  An Opteron HT O read timeout is 4 seconds with
-        * normal NB values
-        */
-       t1 = jiffies;
-       if (time_before(t0 + HZ, t1) && val == -1) {
-               ipath_dev_err(dd, "Error!  Read counter 0x%x timed out\n",
-                             creg);
-               ret = 0ULL;
-               goto bail;
-       }
-       if (reg64) {
-               ret = val64;
-               goto bail;
-       }
-
-       if (creg == dd->ipath_cregs->cr_wordsendcnt) {
-               if (val != dd->ipath_lastsword) {
-                       dd->ipath_sword += val - dd->ipath_lastsword;
-                       dd->ipath_lastsword = val;
-               }
-               val64 = dd->ipath_sword;
-       } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
-               if (val != dd->ipath_lastrword) {
-                       dd->ipath_rword += val - dd->ipath_lastrword;
-                       dd->ipath_lastrword = val;
-               }
-               val64 = dd->ipath_rword;
-       } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
-               if (val != dd->ipath_lastspkts) {
-                       dd->ipath_spkts += val - dd->ipath_lastspkts;
-                       dd->ipath_lastspkts = val;
-               }
-               val64 = dd->ipath_spkts;
-       } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
-               if (val != dd->ipath_lastrpkts) {
-                       dd->ipath_rpkts += val - dd->ipath_lastrpkts;
-                       dd->ipath_lastrpkts = val;
-               }
-               val64 = dd->ipath_rpkts;
-       } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->ibsymsnap;
-               val64 -= dd->ibsymdelta;
-       } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->iblnkerrsnap;
-               val64 -= dd->iblnkerrdelta;
-       } else
-               val64 = (u64) val;
-
-       ret = val64;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
- * @dd: the infinipath device
- *
- * print the delta of egrfull/hdrqfull errors for kernel ports no more than
- * every 5 seconds.  User processes are printed at close, but kernel doesn't
- * close, so...  Separate routine so may call from other places someday, and
- * so function name when printed by _IPATH_INFO is meaningfull
- */
-static void ipath_qcheck(struct ipath_devdata *dd)
-{
-       static u64 last_tot_hdrqfull;
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-       size_t blen = 0;
-       char buf[128];
-       u32 hdrqtail;
-
-       *buf = 0;
-       if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
-               blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
-                               pd->port_hdrqfull -
-                               dd->ipath_p0_hdrqfull);
-               dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
-       }
-       if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%srcvegrfull %llu",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_etidfull -
-                                 dd->ipath_last_tidfull));
-               dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
-       }
-
-       /*
-        * this is actually the number of hdrq full interrupts, not actual
-        * events, but at the moment that's mostly what I'm interested in.
-        * Actual count, etc. is in the counters, if needed.  For production
-        * users this won't ordinarily be printed.
-        */
-
-       if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
-           ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%shdrqfull %llu (all ports)",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_hdrqfull -
-                                 last_tot_hdrqfull));
-               last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
-       }
-       if (blen)
-               ipath_dbg("%s\n", buf);
-
-       hdrqtail = ipath_get_hdrqtail(pd);
-       if (pd->port_head != hdrqtail) {
-               if (dd->ipath_lastport0rcv_cnt ==
-                   ipath_stats.sps_port0pkts) {
-                       ipath_cdbg(PKT, "missing rcv interrupts? "
-                                  "port0 hd=%x tl=%x; port0pkts %llx; write"
-                                  " hd (w/intr)\n",
-                                  pd->port_head, hdrqtail,
-                                  (unsigned long long)
-                                  ipath_stats.sps_port0pkts);
-                       ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
-                               dd->ipath_rhdrhead_intr_off, pd->port_port);
-               }
-               dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
-       }
-}
-
-static void ipath_chk_errormask(struct ipath_devdata *dd)
-{
-       static u32 fixed;
-       u32 ctrl;
-       unsigned long errormask;
-       unsigned long hwerrs;
-
-       if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-
-       if (errormask == dd->ipath_errormask)
-               return;
-       fixed++;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-
-       if ((hwerrs & dd->ipath_hwerrmask) ||
-               (ctrl & INFINIPATH_C_FREEZEMODE)) {
-               /* force re-interrupt of pending events, just in case */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-               dev_info(&dd->pcidev->dev,
-                       "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
-                       fixed, errormask, (unsigned long)dd->ipath_errormask,
-                       ctrl, hwerrs);
-       } else
-               ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
-                       fixed, errormask,
-                       (unsigned long)dd->ipath_errormask);
-}
-
-
-/**
- * ipath_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the infinipath device ipath_devdata
- *
- * called from add_timer
- */
-void ipath_get_faststats(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       int i;
-       static unsigned cnt;
-       unsigned long flags;
-       u64 traffic_wds;
-
-       /*
-        * don't access the chip while running diags, or memory diags can
-        * fail
-        */
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
-           ipath_diag_inuse)
-               /* but re-arm the timer, for diags case; won't hurt other */
-               goto done;
-
-       /*
-        * We now try to maintain a "active timer", based on traffic
-        * exceeding a threshold, so we need to check the word-counts
-        * even if they are 64-bit.
-        */
-       traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       traffic_wds -= dd->ipath_traffic_wds;
-       dd->ipath_traffic_wds += traffic_wds;
-       if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
-               atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-
-       if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       }
-
-       ipath_qcheck(dd);
-
-       /*
-        * deal with repeat error suppression.  Doesn't really matter if
-        * last error was almost a full interval ago, or just a few usecs
-        * ago; still won't get more than 2 per interval.  We may want
-        * longer intervals for this eventually, could do with mod, counter
-        * or separate timer.  Also see code in ipath_handle_errors() and
-        * ipath_handle_hwerrors().
-        */
-
-       if (dd->ipath_lasterror)
-               dd->ipath_lasterror = 0;
-       if (dd->ipath_lasthwerror)
-               dd->ipath_lasthwerror = 0;
-       if (dd->ipath_maskederrs
-           && time_after(jiffies, dd->ipath_unmasktime)) {
-               char ebuf[256];
-               int iserr;
-               iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
-                                        dd->ipath_maskederrs);
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                     INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Re-enabling masked errors "
-                                     "(%s)\n", ebuf);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them.  So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg(
-                                       "Re-enabling queue full errors (%s)\n",
-                                       ebuf);
-                       else
-                               ipath_cdbg(ERRPKT, "Re-enabling packet"
-                                       " problem interrupt (%s)\n", ebuf);
-               }
-
-               /* re-enable masked errors */
-               dd->ipath_errormask |= dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               dd->ipath_maskederrs = 0;
-       }
-
-       /* limit qfull messages to ~one per minute per port */
-       if ((++cnt & 0x10)) {
-               for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       if (pd && pd->port_lastrcvhdrqtail != -1)
-                               pd->port_lastrcvhdrqtail = -1;
-               }
-       }
-
-       ipath_chk_errormask(dd);
-done:
-       mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644 (file)
index b12b1f6..0000000
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/**
- * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * returns the number of bytes consumed, or negative value on error
- */
-int ipath_parse_ushort(const char *str, unsigned short *valp)
-{
-       unsigned long val;
-       char *end;
-       int ret;
-
-       if (!isdigit(str[0])) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       val = simple_strtoul(str, &end, 0);
-
-       if (val > 0xffff) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *valp = val;
-
-       ret = end + 1 - str;
-       if (ret == 0)
-               ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_version(struct device_driver *dev, char *buf)
-{
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
-}
-
-static ssize_t show_num_units(struct device_driver *dev, char *buf)
-{
-       return scnprintf(buf, PAGE_SIZE, "%d\n",
-                        ipath_count_units(NULL, NULL, NULL));
-}
-
-static ssize_t show_status(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
-                       (unsigned long long) *(dd->ipath_statusp));
-
-bail:
-       return ret;
-}
-
-static const char *ipath_status_str[] = {
-       "Initted",
-       "Disabled",
-       "Admin_Disabled",
-       "", /* This used to be the old "OIB_SMA" status. */
-       "", /* This used to be the old "SMA" status. */
-       "Present",
-       "IB_link_up",
-       "IB_configured",
-       "NoIBcable",
-       "Fatal_Hardware_Error",
-       NULL,
-};
-
-static ssize_t show_status_str(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int i, any;
-       u64 s;
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       s = *(dd->ipath_statusp);
-       *buf = '\0';
-       for (any = i = 0; s && ipath_status_str[i]; i++) {
-               if (s & 1) {
-                       if (any && strlcat(buf, " ", PAGE_SIZE) >=
-                           PAGE_SIZE)
-                               /* overflow */
-                               break;
-                       if (strlcat(buf, ipath_status_str[i],
-                                   PAGE_SIZE) >= PAGE_SIZE)
-                               break;
-                       any = 1;
-               }
-               s >>= 1;
-       }
-       if (any)
-               strlcat(buf, "\n", PAGE_SIZE);
-
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_boardversion(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
-}
-
-static ssize_t show_localbus_info(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
-}
-
-static ssize_t show_lmc(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
-}
-
-static ssize_t store_lmc(struct device *dev,
-                        struct device_attribute *attr,
-                        const char *buf,
-                        size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lmc = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lmc);
-       if (ret < 0)
-               goto invalid;
-
-       if (lmc > 7) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, dd->ipath_lid, lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
-bail:
-       return ret;
-}
-
-static ssize_t show_lid(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
-}
-
-static ssize_t store_lid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lid = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lid);
-       if (ret < 0)
-               goto invalid;
-
-       if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, lid, dd->ipath_lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
-bail:
-       return ret;
-}
-
-static ssize_t show_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
-}
-
-static ssize_t store_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 mlid;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &mlid);
-       if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
-               goto invalid;
-
-       dd->ipath_mlid = mlid;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MLID\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_guid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u8 *guid;
-
-       guid = (u8 *) & (dd->ipath_guid);
-
-       return scnprintf(buf, PAGE_SIZE,
-                        "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-                        guid[0], guid[1], guid[2], guid[3],
-                        guid[4], guid[5], guid[6], guid[7]);
-}
-
-static ssize_t store_guid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       unsigned short guid[8];
-       __be64 new_guid;
-       u8 *ng;
-       int i;
-
-       if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
-                  &guid[0], &guid[1], &guid[2], &guid[3],
-                  &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
-               goto invalid;
-
-       ng = (u8 *) &new_guid;
-
-       for (i = 0; i < 8; i++) {
-               if (guid[i] > 0xff)
-                       goto invalid;
-               ng[i] = guid[i];
-       }
-
-       if (new_guid == 0)
-               goto invalid;
-
-       dd->ipath_guid = new_guid;
-       dd->ipath_nguid = 1;
-       if (dd->verbs_dev)
-               dd->verbs_dev->ibdev.node_guid = new_guid;
-
-       ret = strlen(buf);
-       goto bail;
-
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid GUID\n");
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_nguid(struct device *dev,
-                         struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
-}
-
-static ssize_t show_nports(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       /* Return the number of user ports available. */
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
-}
-
-static ssize_t show_serial(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       buf[sizeof dd->ipath_serial] = '\0';
-       memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
-       strcat(buf, "\n");
-       return strlen(buf);
-}
-
-static ssize_t show_unit(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
-}
-
-static ssize_t show_jint_max_packets(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
-}
-
-static ssize_t store_jint_max_packets(struct device *dev,
-                                     struct device_attribute *attr,
-                                     const char *buf,
-                                     size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_max_packets.\n");
-       else
-               dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
-
-       return ret;
-}
-
-static ssize_t show_jint_idle_ticks(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
-}
-
-static ssize_t store_jint_idle_ticks(struct device *dev,
-                                    struct device_attribute *attr,
-                                    const char *buf,
-                                    size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
-       else
-               dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
-
-       return ret;
-}
-
-#define DEVICE_COUNTER(name, attr) \
-       static ssize_t show_counter_##name(struct device *dev, \
-                                          struct device_attribute *attr, \
-                                          char *buf) \
-       { \
-               struct ipath_devdata *dd = dev_get_drvdata(dev); \
-               return scnprintf(\
-                       buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
-                       ipath_snap_cntr( \
-                               dd, offsetof(struct infinipath_counters, \
-                                            attr) / sizeof(u64)));     \
-       } \
-       static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
-
-DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
-DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
-DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
-DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
-DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
-DEVICE_COUNTER(lb_ints, LBIntCnt);
-DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
-DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
-DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
-DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
-DEVICE_COUNTER(rx_dwords, RxDwordCnt);
-DEVICE_COUNTER(rx_ebps, RxEBPCnt);
-DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
-DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
-DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
-DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
-DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
-DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
-DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
-DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
-DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
-DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
-DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
-DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
-DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
-DEVICE_COUNTER(tx_dwords, TxDwordCnt);
-DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
-DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
-DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
-DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
-DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
-DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
-
-static struct attribute *dev_counter_attributes[] = {
-       &dev_attr_ib_link_downeds.attr,
-       &dev_attr_ib_link_err_recoveries.attr,
-       &dev_attr_ib_status_changes.attr,
-       &dev_attr_ib_symbol_errs.attr,
-       &dev_attr_lb_flow_stalls.attr,
-       &dev_attr_lb_ints.attr,
-       &dev_attr_rx_bad_formats.attr,
-       &dev_attr_rx_buf_ovfls.attr,
-       &dev_attr_rx_data_pkts.attr,
-       &dev_attr_rx_dropped_pkts.attr,
-       &dev_attr_rx_dwords.attr,
-       &dev_attr_rx_ebps.attr,
-       &dev_attr_rx_flow_ctrl_errs.attr,
-       &dev_attr_rx_flow_pkts.attr,
-       &dev_attr_rx_icrc_errs.attr,
-       &dev_attr_rx_len_errs.attr,
-       &dev_attr_rx_link_problems.attr,
-       &dev_attr_rx_lpcrc_errs.attr,
-       &dev_attr_rx_max_min_len_errs.attr,
-       &dev_attr_rx_p0_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p1_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p2_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p3_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p4_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p5_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p6_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p7_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p8_hdr_egr_ovfls.attr,
-       &dev_attr_rx_pkey_mismatches.attr,
-       &dev_attr_rx_tid_full_errs.attr,
-       &dev_attr_rx_tid_valid_errs.attr,
-       &dev_attr_rx_vcrc_errs.attr,
-       &dev_attr_tx_data_pkts.attr,
-       &dev_attr_tx_dropped_pkts.attr,
-       &dev_attr_tx_dwords.attr,
-       &dev_attr_tx_flow_pkts.attr,
-       &dev_attr_tx_flow_stalls.attr,
-       &dev_attr_tx_len_errs.attr,
-       &dev_attr_tx_max_min_len_errs.attr,
-       &dev_attr_tx_underruns.attr,
-       &dev_attr_tx_unsup_vl_errs.attr,
-       NULL
-};
-
-static struct attribute_group dev_counter_attr_group = {
-       .name = "counters",
-       .attrs = dev_counter_attributes
-};
-
-static ssize_t store_reset(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       if (count < 5 || memcmp(buf, "reset", 5)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (dd->ipath_flags & IPATH_DISABLED) {
-               /*
-                * post-reset init would re-enable interrupts, etc.
-                * so don't allow reset on disabled devices.  Not
-                * perfect error, but about the best choice.
-                */
-               dev_info(dev,"Unit %d is disabled, can't reset\n",
-                        dd->ipath_unit);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_reset_device(dd->ipath_unit);
-bail:
-       return ret<0 ? ret : count;
-}
-
-static ssize_t store_link_state(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 state;
-
-       ret = ipath_parse_ushort(buf, &state);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_linkstate(dd, state);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid link state\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
-}
-
-static ssize_t store_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 mtu = 0;
-       int r;
-
-       ret = ipath_parse_ushort(buf, &mtu);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_mtu(dd, mtu);
-       if (r < 0)
-               ret = r;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MTU\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n",
-                        (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
-}
-
-static ssize_t store_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 enable = 0;
-
-       ret = ipath_parse_ushort(buf, &enable);
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
-               goto bail;
-       }
-
-       if (enable) {
-               if (!(dd->ipath_flags & IPATH_DISABLED))
-                       goto bail;
-
-               dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
-               /* same as post-reset */
-               ret = ipath_init_chip(dd, 1);
-               if (ret)
-                       ipath_dev_err(dd, "Failed to enable unit %d\n",
-                                     dd->ipath_unit);
-               else {
-                       dd->ipath_flags &= ~IPATH_DISABLED;
-                       *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
-               }
-       } else if (!(dd->ipath_flags & IPATH_DISABLED)) {
-               dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
-               ipath_shutdown_device(dd);
-               dd->ipath_flags |= IPATH_DISABLED;
-               *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
-       }
-
-bail:
-       return ret;
-}
-
-static ssize_t store_rx_pol_inv(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_rx_pol_inv(dd, val);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
-bail:
-       return ret;
-}
-
-static ssize_t store_led_override(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret > 0)
-               ipath_set_led_override(dd, val);
-       else
-               ipath_dev_err(dd, "attempt to set invalid LED override\n");
-       return ret;
-}
-
-static ssize_t show_logged_errs(struct device *dev,
-                               struct device_attribute *attr,
-                               char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int idx, count;
-
-       /* force consistency with actual EEPROM */
-       if (ipath_update_eeprom_log(dd) != 0)
-               return -ENXIO;
-
-       count = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
-                       dd->ipath_eep_st_errs[idx],
-                       idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
-       }
-
-       return count;
-}
-
-/*
- * New sysfs entries to control various IB config. These all turn into
- * accesses via ipath_f_get/set_ib_cfg.
- *
- * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
- */
-static ssize_t show_hrtbt_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_hrtbt_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 3)
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
-               goto bail;
-       }
-
-       /*
-        * Set the "intentional" heartbeat enable per either of
-        * "Enable" and "Auto", as these are normally set together.
-        * This bit is consulted when leaving loopback mode,
-        * because entering loopback mode overrides it and automatically
-        * disables heartbeat.
-        */
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
-       if (r < 0)
-               ret = r;
-       else if (val == IPATH_IB_HRTBT_OFF)
-               dd->ipath_flags |= IPATH_NO_HRTBT;
-       else
-               dd->ipath_flags &= ~IPATH_NO_HRTBT;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
- * _not_ the particular encoding of any given chip)
- */
-static ssize_t show_lwid_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lwid_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > 3))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Width (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link width */
-static ssize_t show_lwid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
- */
-static ssize_t show_spd_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_spd_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Speed (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link speed */
-static ssize_t show_spd(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set RX polarity-invert enable. 0=no, 1=yes.
- */
-static ssize_t show_rx_polinv_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_rx_polinv_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Rx Polarity (enable)\n");
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set RX lane-reversal enable. 0=no, 1=yes.
- */
-static ssize_t show_lanerev_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lanerev_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ret = -EINVAL;
-               ipath_dev_err(dd,
-                       "attempt to set invalid Lane reversal (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
-static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
-
-static struct attribute *driver_attributes[] = {
-       &driver_attr_num_units.attr,
-       &driver_attr_version.attr,
-       NULL
-};
-
-static struct attribute_group driver_attr_group = {
-       .attrs = driver_attributes
-};
-
-static ssize_t store_tempsense(struct device *dev,
-                              struct device_attribute *attr,
-                              const char *buf,
-                              size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, stat;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret <= 0) {
-               ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
-               goto bail;
-       }
-       /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
-       stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set tempsense config\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set local Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set remote Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/*
- * dump tempsense regs. in decimal, to ease shell-scripts.
- */
-static ssize_t show_tempsense(struct device *dev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       int idx;
-       u8 regvals[8];
-
-       ret = -ENXIO;
-       for (idx = 0; idx < 8; ++idx) {
-               if (idx == 6)
-                       continue;
-               ret = ipath_tempsense_read(dd, idx);
-               if (ret < 0)
-                       break;
-               regvals[idx] = ret;
-       }
-       if (idx == 8)
-               ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
-                       *(signed char *)(regvals),
-                       *(signed char *)(regvals + 1),
-                       regvals[2], regvals[3],
-                       *(signed char *)(regvals + 5),
-                       *(signed char *)(regvals + 7));
-       return ret;
-}
-
-const struct attribute_group *ipath_driver_attr_groups[] = {
-       &driver_attr_group,
-       NULL,
-};
-
-static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
-static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
-static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
-static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
-static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
-static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
-static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
-static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
-static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
-static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
-static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
-static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
-static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
-                  show_jint_max_packets, store_jint_max_packets);
-static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
-                  show_jint_idle_ticks, store_jint_idle_ticks);
-static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
-                  show_tempsense, store_tempsense);
-
-static struct attribute *dev_attributes[] = {
-       &dev_attr_guid.attr,
-       &dev_attr_lmc.attr,
-       &dev_attr_lid.attr,
-       &dev_attr_link_state.attr,
-       &dev_attr_mlid.attr,
-       &dev_attr_mtu.attr,
-       &dev_attr_nguid.attr,
-       &dev_attr_nports.attr,
-       &dev_attr_serial.attr,
-       &dev_attr_status.attr,
-       &dev_attr_status_str.attr,
-       &dev_attr_boardversion.attr,
-       &dev_attr_unit.attr,
-       &dev_attr_enabled.attr,
-       &dev_attr_rx_pol_inv.attr,
-       &dev_attr_led_override.attr,
-       &dev_attr_logged_errors.attr,
-       &dev_attr_tempsense.attr,
-       &dev_attr_localbus_info.attr,
-       NULL
-};
-
-static struct attribute_group dev_attr_group = {
-       .attrs = dev_attributes
-};
-
-static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
-                  store_hrtbt_enb);
-static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
-                  store_lwid_enb);
-static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
-static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
-                  store_spd_enb);
-static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
-static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
-                  store_rx_polinv_enb);
-static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
-                  store_lanerev_enb);
-
-static struct attribute *dev_ibcfg_attributes[] = {
-       &dev_attr_hrtbt_enable.attr,
-       &dev_attr_link_width_enable.attr,
-       &dev_attr_link_width.attr,
-       &dev_attr_link_speed_enable.attr,
-       &dev_attr_link_speed.attr,
-       &dev_attr_rx_pol_inv_enable.attr,
-       &dev_attr_rx_lane_rev_enable.attr,
-       NULL
-};
-
-static struct attribute_group dev_ibcfg_attr_group = {
-       .attrs = dev_ibcfg_attributes
-};
-
-/**
- * ipath_expose_reset - create a device reset file
- * @dev: the device structure
- *
- * Only expose a file that lets us reset the device after someone
- * enters diag mode.  A device reset is quite likely to crash the
- * machine entirely, so we don't want to normally make it
- * available.
- *
- * Called with ipath_mutex held.
- */
-int ipath_expose_reset(struct device *dev)
-{
-       static int exposed;
-       int ret;
-
-       if (!exposed) {
-               ret = device_create_file(dev, &dev_attr_reset);
-               exposed = 1;
-       } else {
-               ret = 0;
-       }
-
-       return ret;
-}
-
-int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
-{
-       int ret;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
-       if (ret)
-               goto bail;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
-       if (ret)
-               goto bail_attrs;
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
-               if (ret)
-                       goto bail_counter;
-               ret = device_create_file(dev, &dev_attr_jint_max_packets);
-               if (ret)
-                       goto bail_idle;
-
-               ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
-               if (ret)
-                       goto bail_max;
-       }
-
-       return 0;
-
-bail_max:
-       device_remove_file(dev, &dev_attr_jint_max_packets);
-bail_idle:
-       device_remove_file(dev, &dev_attr_jint_idle_ticks);
-bail_counter:
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-bail_attrs:
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-bail:
-       return ret;
-}
-
-void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
-{
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
-               device_remove_file(dev, &dev_attr_jint_idle_ticks);
-               device_remove_file(dev, &dev_attr_jint_max_packets);
-       }
-
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-
-       device_remove_file(dev, &dev_attr_reset);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644 (file)
index 0246b30..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/**
- * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_uc_req(struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 hwords;
-       u32 bth0;
-       u32 len;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int ret = 0;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Get the next send request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       qp->s_wqe = NULL;
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /* Check if send work queue is empty. */
-               if (qp->s_cur == qp->s_head)
-                       goto bail;
-               /*
-                * Start a new request.
-                */
-               qp->s_psn = wqe->psn = qp->s_next_psn;
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = len = wqe->length;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       if (len > pmtu) {
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / 4;
-                       if (len > pmtu) {
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               break;
-
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state =
-                               OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(to_idev(qp->ibqp.device),
-                             qp, ohdr, bth0 | (qp->s_state << 24),
-                             qp->s_next_psn++ & IPATH_PSN_MASK);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_uc_rcv - handle an incoming UC packet
- * @dev: the device the packet came in on
- * @hdr: the header of the packet
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
- *
- * This is called from ipath_qp_rcv() to process an incoming UC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       struct ib_reth *reth;
-       int header_in_data;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the
-                * core driver sets the eager header buffer
-                * size to 56 bytes so the last 4 bytes of
-                * the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-       memset(&wc, 0, sizeof wc);
-
-       /* Compare the PSN verses the expected PSN. */
-       if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
-               /*
-                * Handle a sequence error.
-                * Silently drop any current message.
-                */
-               qp->r_psn = psn;
-       inv:
-               qp->r_state = OP(SEND_LAST);
-               switch (opcode) {
-               case OP(SEND_FIRST):
-               case OP(SEND_ONLY):
-               case OP(SEND_ONLY_WITH_IMMEDIATE):
-                       goto send_first;
-
-               case OP(RDMA_WRITE_FIRST):
-               case OP(RDMA_WRITE_ONLY):
-               case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-                       goto rdma_first;
-
-               default:
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       default:
-               if (opcode == OP(SEND_FIRST) ||
-                   opcode == OP(SEND_ONLY) ||
-                   opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_FIRST) ||
-                   opcode == OP(RDMA_WRITE_ONLY) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-       }
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-       send_first:
-               if (qp->r_flags & IPATH_R_REUSE_SGE) {
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-                       qp->r_sge = qp->s_rdma_read_sge;
-               } else if (!ipath_get_rwqe(qp, 0)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Save the WQE so we can reuse it in case of an error. */
-               qp->s_rdma_read_sge = qp->r_sge;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
-                       goto send_last_imm;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.opcode = IB_WC_RECV;
-       last_imm:
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
-       rdma_first:
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
-                                          vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok)) {
-                               dev->n_pkt_drops++;
-                               goto done;
-                       }
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto rdma_last;
-               else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       goto rdma_last_imm;
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-       rdma_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (qp->r_flags & IPATH_R_REUSE_SGE)
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-               else if (!ipath_get_rwqe(qp, 1)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.byte_len = qp->r_len;
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               goto last_imm;
-
-       case OP(RDMA_WRITE_LAST):
-       rdma_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               break;
-
-       default:
-               /* Drop packet for unknown opcodes. */
-               dev->n_pkt_drops++;
-               goto done;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644 (file)
index 385d941..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the sending QP
- * @swqe: the send work request
- *
- * This is called from ipath_make_ud_req() to forward a WQE addressed
- * to the same HCA.
- * Note that the receive interrupt handler may be calling ipath_ud_rcv()
- * while this is being called.
- */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ib_ah_attr *ah_attr;
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_srq *srq;
-       struct ipath_sge_state rsge;
-       struct ipath_sge *sge;
-       struct ipath_rwq *wq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       struct ib_wc wc;
-       u32 tail;
-       u32 rlen;
-       u32 length;
-
-       qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               goto done;
-       }
-
-       /*
-        * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       if (unlikely(qp->ibqp.qp_num &&
-                    ((int) swqe->ud_wr.remote_qkey < 0 ?
-                     sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
-               /* XXX OK to lose a count once in a while. */
-               dev->qkey_violations++;
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       length = swqe->length;
-       memset(&wc, 0, sizeof wc);
-       wc.byte_len = length + sizeof(struct ib_grh);
-
-       if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = swqe->wr.ex.imm_data;
-       }
-
-       /*
-        * This would be a lot simpler if we could call ipath_get_rwqe()
-        * but that uses state that the receive interrupt handler uses
-        * so we would need to lock out receive interrupts while doing
-        * local loopback.
-        */
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        * Note that it is safe to drop the lock after changing rq->tail
-        * since ipath_post_receive() won't fill the empty slot.
-        */
-       spin_lock_irqsave(&rq->lock, flags);
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       if (unlikely(tail == wq->head)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       wqe = get_rwqe_ptr(rq, tail);
-       rsge.sg_list = qp->r_ud_sg_list;
-       if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > rlen) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       if (++tail >= rq->size)
-               tail = 0;
-       wq->tail = tail;
-       wc.wr_id = wqe->wr_id;
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-               } else
-                       spin_unlock_irqrestore(&rq->lock, flags);
-       } else
-               spin_unlock_irqrestore(&rq->lock, flags);
-
-       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-       sge = swqe->sg_list;
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&rsge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--swqe->wr.num_sge)
-                               sge++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = sqp->ibqp.qp_num;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = dev->dd->ipath_lid |
-               (ah_attr->src_path_bits &
-                ((1 << dev->dd->ipath_lmc) - 1));
-       wc.sl = ah_attr->sl;
-       wc.dlid_path_bits =
-               ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
-drop:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-done:;
-}
-
-/**
- * ipath_make_ud_req - construct a UD request packet
- * @qp: the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_ud_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ib_ah_attr *ah_attr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 nwords;
-       u32 extra_bytes;
-       u32 bth0;
-       u16 lrh0;
-       u16 lid;
-       int ret = 0;
-       int next_cur;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       if (qp->s_cur == qp->s_head)
-               goto bail;
-
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       next_cur = qp->s_cur + 1;
-       if (next_cur >= qp->s_size)
-               next_cur = 0;
-
-       /* Construct the header. */
-       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
-               if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
-                       dev->n_multicast_xmit++;
-               else
-                       dev->n_unicast_xmit++;
-       } else {
-               dev->n_unicast_xmit++;
-               lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid == dev->dd->ipath_lid)) {
-                       /*
-                        * If DMAs are in progress, we can't generate
-                        * a completion for the loopback packet since
-                        * it would be out of order.
-                        * XXX Instead of waiting, we could queue a
-                        * zero length descriptor so we get a callback.
-                        */
-                       if (atomic_read(&qp->s_dma_busy)) {
-                               qp->s_flags |= IPATH_S_WAIT_DMA;
-                               goto bail;
-                       }
-                       qp->s_cur = next_cur;
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       ipath_ud_loopback(qp, wqe);
-                       spin_lock_irqsave(&qp->s_lock, flags);
-                       ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
-                       goto done;
-               }
-       }
-
-       qp->s_cur = next_cur;
-       extra_bytes = -wqe->length & 3;
-       nwords = (wqe->length + extra_bytes) >> 2;
-
-       /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
-       qp->s_hdrwords = 7;
-       qp->s_cur_size = wqe->length;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_dmult = ah_attr->static_rate;
-       qp->s_wqe = wqe;
-       qp->s_sge.sge = wqe->sg_list[0];
-       qp->s_sge.sg_list = wqe->sg_list + 1;
-       qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
-
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               /* Header size in 32-bit words. */
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &ah_attr->grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-               ohdr = &qp->s_hdr.u.l.oth;
-               /*
-                * Don't worry about sending to locally attached multicast
-                * QPs.  It is unspecified by the spec. what happens.
-                */
-       } else {
-               /* Header size in 32-bit words. */
-               lrh0 = IPATH_LRH_BTH;
-               ohdr = &qp->s_hdr.u.oth;
-       }
-       if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
-               qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
-               bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-       } else
-               bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-       lrh0 |= ah_attr->sl << 4;
-       if (qp->ibqp.qp_type == IB_QPT_SMI)
-               lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-                                          SIZE_OF_CRC);
-       lid = dev->dd->ipath_lid;
-       if (lid) {
-               lid |= ah_attr->src_path_bits &
-                       ((1 << dev->dd->ipath_lmc) - 1);
-               qp->s_hdr.lrh[3] = cpu_to_be16(lid);
-       } else
-               qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-       if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
-               bth0 |= 1 << 23;
-       bth0 |= extra_bytes << 20;
-       bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-               ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       /*
-        * Use the multicast QP if the destination LID is a multicast LID.
-        */
-       ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-               ah_attr->dlid != IPATH_PERMISSIVE_LID ?
-               cpu_to_be32(IPATH_MULTICAST_QPN) :
-               cpu_to_be32(wqe->ud_wr.remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
-       /*
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
-                                        qp->qkey : wqe->ud_wr.remote_qkey);
-       ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_ud_rcv - receive an incoming UD packet
- * @dev: the device the packet came in on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_qp_rcv() to process an incoming UD packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 pad;
-       struct ib_wc wc;
-       u32 qkey;
-       u32 src_qp;
-       u16 dlid;
-       int header_in_data;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12 + 8;   /* LRH + BTH + DETH */
-               qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-               src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
-               /*
-                * The header with GRH is 68 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 12
-                * bytes of the IB header is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       qkey = be32_to_cpu(((__be32 *) data)[1]);
-                       src_qp = be32_to_cpu(((__be32 *) data)[2]);
-                       data += 12;
-               } else {
-                       qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-                       src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               }
-       }
-       src_qp &= IPATH_QPN_MASK;
-
-       /*
-        * Check that the permissive LID is only used on QP0
-        * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
-        */
-       if (qp->ibqp.qp_num) {
-               if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                            hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               if (unlikely(qkey != qp->qkey)) {
-                       /* XXX OK to lose a count once in a while. */
-                       dev->qkey_violations++;
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                  hdr->lrh[3] == IB_LID_PERMISSIVE) {
-               struct ib_smp *smp = (struct ib_smp *) data;
-
-               if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       }
-
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (qp->ibqp.qp_num > 1 &&
-           opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else
-                       wc.ex.imm_data = ohdr->u.ud.imm_data;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               hdrsize += sizeof(u32);
-       } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-               wc.ex.imm_data = 0;
-               wc.wc_flags = 0;
-       } else {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /* Get the number of bytes the message was padded by. */
-       pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-       if (unlikely(tlen < (hdrsize + pad + 4))) {
-               /* Drop incomplete packets. */
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       tlen -= hdrsize + pad + 4;
-
-       /* Drop invalid MAD packets (see 13.5.3.1). */
-       if (unlikely((qp->ibqp.qp_num == 0 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
-                    (qp->ibqp.qp_num == 1 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       wc.byte_len = tlen + sizeof(struct ib_grh);
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        */
-       if (qp->r_flags & IPATH_R_REUSE_SGE)
-               qp->r_flags &= ~IPATH_R_REUSE_SGE;
-       else if (!ipath_get_rwqe(qp, 0)) {
-               /*
-                * Count VL15 packets dropped due to no receive buffer.
-                * Otherwise, count them as buffer overruns since usually,
-                * the HW will be able to receive packets even if there are
-                * no QPs with posted receive buffers.
-                */
-               if (qp->ibqp.qp_num == 0)
-                       dev->n_vl15_dropped++;
-               else
-                       dev->rcv_errors++;
-               goto bail;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > qp->r_len) {
-               qp->r_flags |= IPATH_R_REUSE_SGE;
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       if (has_grh) {
-               ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-                              sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
-       ipath_copy_sge(&qp->r_sge, data,
-                      wc.byte_len - sizeof(struct ib_grh));
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto bail;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.vendor_err = 0;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = src_qp;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = be16_to_cpu(hdr->lrh[3]);
-       wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
-       dlid = be16_to_cpu(hdr->lrh[1]);
-       /*
-        * Save the LMC lower bits if the destination LID is a unicast LID.
-        */
-       wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-               dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      (ohdr->bth[0] &
-                       cpu_to_be32(1 << 23)) != 0);
-
-bail:;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644 (file)
index d29b4da..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-static void __ipath_release_user_pages(struct page **p, size_t num_pages,
-                                  int dirty)
-{
-       size_t i;
-
-       for (i = 0; i < num_pages; i++) {
-               ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
-                          (unsigned long) num_pages, p[i]);
-               if (dirty)
-                       set_page_dirty_lock(p[i]);
-               put_page(p[i]);
-       }
-}
-
-/* call with current->mm->mmap_sem held */
-static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                                 struct page **p)
-{
-       unsigned long lock_limit;
-       size_t got;
-       int ret;
-
-       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (num_pages > lock_limit) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
-                  (unsigned long) num_pages, start_page);
-
-       for (got = 0; got < num_pages; got += ret) {
-               ret = get_user_pages(current, current->mm,
-                                    start_page + got * PAGE_SIZE,
-                                    num_pages - got, 1, 1,
-                                    p + got, NULL);
-               if (ret < 0)
-                       goto bail_release;
-       }
-
-       current->mm->pinned_vm += num_pages;
-
-       ret = 0;
-       goto bail;
-
-bail_release:
-       __ipath_release_user_pages(p, got, 0);
-bail:
-       return ret;
-}
-
-/**
- * ipath_map_page - a safety wrapper around pci_map_page()
- *
- * A dma_addr of all 0's is interpreted by the chip as "disabled".
- * Unfortunately, it can also be a valid dma_addr returned on some
- * architectures.
- *
- * The powerpc iommu assigns dma_addrs in ascending order, so we don't
- * have to bother with retries or mapping a dummy page to insure we
- * don't just get the same mapping again.
- *
- * I'm sure we won't be so lucky with other iommu's, so FIXME.
- */
-dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
-       unsigned long offset, size_t size, int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_page(hwdev, page, offset, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_page(hwdev, phys, size, direction);
-               phys = pci_map_page(hwdev, page, offset, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_map_single - a safety wrapper around pci_map_single()
- *
- * Same idea as ipath_map_page().
- */
-dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-       int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_single(hwdev, ptr, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_single(hwdev, phys, size, direction);
-               phys = pci_map_single(hwdev, ptr, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_get_user_pages - lock user pages into memory
- * @start_page: the start page
- * @num_pages: the number of pages
- * @p: the output page structures
- *
- * This function takes a given start page (page aligned user virtual
- * address) and pins it and the following specified number of pages.  For
- * now, num_pages is always 1, but that will probably change at some point
- * (because caller is doing expected sends on a single virtually contiguous
- * buffer, so we can do all pages at once).
- */
-int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                        struct page **p)
-{
-       int ret;
-
-       down_write(&current->mm->mmap_sem);
-
-       ret = __ipath_get_user_pages(start_page, num_pages, p);
-
-       up_write(&current->mm->mmap_sem);
-
-       return ret;
-}
-
-void ipath_release_user_pages(struct page **p, size_t num_pages)
-{
-       down_write(&current->mm->mmap_sem);
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       current->mm->pinned_vm -= num_pages;
-
-       up_write(&current->mm->mmap_sem);
-}
-
-struct ipath_user_pages_work {
-       struct work_struct work;
-       struct mm_struct *mm;
-       unsigned long num_pages;
-};
-
-static void user_pages_account(struct work_struct *_work)
-{
-       struct ipath_user_pages_work *work =
-               container_of(_work, struct ipath_user_pages_work, work);
-
-       down_write(&work->mm->mmap_sem);
-       work->mm->pinned_vm -= work->num_pages;
-       up_write(&work->mm->mmap_sem);
-       mmput(work->mm);
-       kfree(work);
-}
-
-void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
-{
-       struct ipath_user_pages_work *work;
-       struct mm_struct *mm;
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       mm = get_task_mm(current);
-       if (!mm)
-               return;
-
-       work = kmalloc(sizeof(*work), GFP_KERNEL);
-       if (!work)
-               goto bail_mm;
-
-       INIT_WORK(&work->work, user_pages_account);
-       work->mm = mm;
-       work->num_pages = num_pages;
-
-       queue_work(ib_wq, &work->work);
-       return;
-
-bail_mm:
-       mmput(mm);
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644 (file)
index 8c12e3c..0000000
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmapool.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/uio.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_user_sdma.h"
-
-/* minimum size of header */
-#define IPATH_USER_SDMA_MIN_HEADER_LENGTH      64
-/* expected size of headers (for dma_pool) */
-#define IPATH_USER_SDMA_EXP_HEADER_LENGTH      64
-/* length mask in PBC (lower 11 bits) */
-#define IPATH_PBC_LENGTH_MASK                  ((1 << 11) - 1)
-
-struct ipath_user_sdma_pkt {
-       u8 naddr;               /* dimension of addr (1..3) ... */
-       u32 counter;            /* sdma pkts queued counter for this entry */
-       u64 added;              /* global descq number of entries */
-
-       struct {
-               u32 offset;                     /* offset for kvaddr, addr */
-               u32 length;                     /* length in page */
-               u8  put_page;                   /* should we put_page? */
-               u8  dma_mapped;                 /* is page dma_mapped? */
-               struct page *page;              /* may be NULL (coherent mem) */
-               void *kvaddr;                   /* FIXME: only for pio hack */
-               dma_addr_t addr;
-       } addr[4];   /* max pages, any more and we coalesce */
-       struct list_head list;  /* list element */
-};
-
-struct ipath_user_sdma_queue {
-       /*
-        * pkts sent to dma engine are queued on this
-        * list head.  the type of the elements of this
-        * list are struct ipath_user_sdma_pkt...
-        */
-       struct list_head sent;
-
-       /* headers with expected length are allocated from here... */
-       char header_cache_name[64];
-       struct dma_pool *header_cache;
-
-       /* packets are allocated from the slab cache... */
-       char pkt_slab_name[64];
-       struct kmem_cache *pkt_slab;
-
-       /* as packets go on the queued queue, they are counted... */
-       u32 counter;
-       u32 sent_counter;
-
-       /* dma page table */
-       struct rb_root dma_pages_root;
-
-       /* protect everything above... */
-       struct mutex lock;
-};
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
-{
-       struct ipath_user_sdma_queue *pq =
-               kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
-
-       if (!pq)
-               goto done;
-
-       pq->counter = 0;
-       pq->sent_counter = 0;
-       INIT_LIST_HEAD(&pq->sent);
-
-       mutex_init(&pq->lock);
-
-       snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
-                "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
-       pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
-                                        sizeof(struct ipath_user_sdma_pkt),
-                                        0, 0, NULL);
-
-       if (!pq->pkt_slab)
-               goto err_kfree;
-
-       snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
-                "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
-       pq->header_cache = dma_pool_create(pq->header_cache_name,
-                                          dev,
-                                          IPATH_USER_SDMA_EXP_HEADER_LENGTH,
-                                          4, 0);
-       if (!pq->header_cache)
-               goto err_slab;
-
-       pq->dma_pages_root = RB_ROOT;
-
-       goto done;
-
-err_slab:
-       kmem_cache_destroy(pq->pkt_slab);
-err_kfree:
-       kfree(pq);
-       pq = NULL;
-
-done:
-       return pq;
-}
-
-static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
-                                     int i, size_t offset, size_t len,
-                                     int put_page, int dma_mapped,
-                                     struct page *page,
-                                     void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->addr[i].offset = offset;
-       pkt->addr[i].length = len;
-       pkt->addr[i].put_page = put_page;
-       pkt->addr[i].dma_mapped = dma_mapped;
-       pkt->addr[i].page = page;
-       pkt->addr[i].kvaddr = kvaddr;
-       pkt->addr[i].addr = dma_addr;
-}
-
-static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
-                                       u32 counter, size_t offset,
-                                       size_t len, int dma_mapped,
-                                       struct page *page,
-                                       void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->naddr = 1;
-       pkt->counter = counter;
-       ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
-                                 kvaddr, dma_addr);
-}
-
-/* we've too many pages in the iovec, coalesce to a single page */
-static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
-                                   struct ipath_user_sdma_pkt *pkt,
-                                   const struct iovec *iov,
-                                   unsigned long niov) {
-       int ret = 0;
-       struct page *page = alloc_page(GFP_KERNEL);
-       void *mpage_save;
-       char *mpage;
-       int i;
-       int len = 0;
-       dma_addr_t dma_addr;
-
-       if (!page) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       mpage = kmap(page);
-       mpage_save = mpage;
-       for (i = 0; i < niov; i++) {
-               int cfur;
-
-               cfur = copy_from_user(mpage,
-                                     iov[i].iov_base, iov[i].iov_len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_unmap;
-               }
-
-               mpage += iov[i].iov_len;
-               len += iov[i].iov_len;
-       }
-
-       dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
-                               DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-               ret = -ENOMEM;
-               goto free_unmap;
-       }
-
-       ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
-                                 dma_addr);
-       pkt->naddr = 2;
-
-       goto done;
-
-free_unmap:
-       kunmap(page);
-       __free_page(page);
-done:
-       return ret;
-}
-
-/* how many pages in this iovec element? */
-static int ipath_user_sdma_num_pages(const struct iovec *iov)
-{
-       const unsigned long addr  = (unsigned long) iov->iov_base;
-       const unsigned long  len  = iov->iov_len;
-       const unsigned long spage = addr & PAGE_MASK;
-       const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
-       return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-/* truncate length to page boundary */
-static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
-       const unsigned long offset = offset_in_page(addr);
-
-       return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
-static void ipath_user_sdma_free_pkt_frag(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct ipath_user_sdma_pkt *pkt,
-                                         int frag)
-{
-       const int i = frag;
-
-       if (pkt->addr[i].page) {
-               if (pkt->addr[i].dma_mapped)
-                       dma_unmap_page(dev,
-                                      pkt->addr[i].addr,
-                                      pkt->addr[i].length,
-                                      DMA_TO_DEVICE);
-
-               if (pkt->addr[i].kvaddr)
-                       kunmap(pkt->addr[i].page);
-
-               if (pkt->addr[i].put_page)
-                       put_page(pkt->addr[i].page);
-               else
-                       __free_page(pkt->addr[i].page);
-       } else if (pkt->addr[i].kvaddr)
-               /* free coherent mem from cache... */
-               dma_pool_free(pq->header_cache,
-                             pkt->addr[i].kvaddr, pkt->addr[i].addr);
-}
-
-/* return number of pages pinned... */
-static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_pkt *pkt,
-                                    unsigned long addr, int tlen, int npages)
-{
-       struct page *pages[2];
-       int j;
-       int ret;
-
-       ret = get_user_pages_fast(addr, npages, 0, pages);
-       if (ret != npages) {
-               int i;
-
-               for (i = 0; i < ret; i++)
-                       put_page(pages[i]);
-
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       for (j = 0; j < npages; j++) {
-               /* map the pages... */
-               const int flen =
-                       ipath_user_sdma_page_length(addr, tlen);
-               dma_addr_t dma_addr =
-                       dma_map_page(&dd->pcidev->dev,
-                                    pages[j], 0, flen, DMA_TO_DEVICE);
-               unsigned long fofs = offset_in_page(addr);
-
-               if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-
-               ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
-                                         pages[j], kmap(pages[j]),
-                                         dma_addr);
-
-               pkt->naddr++;
-               addr += flen;
-               tlen -= flen;
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  struct ipath_user_sdma_pkt *pkt,
-                                  const struct iovec *iov,
-                                  unsigned long niov)
-{
-       int ret = 0;
-       unsigned long idx;
-
-       for (idx = 0; idx < niov; idx++) {
-               const int npages = ipath_user_sdma_num_pages(iov + idx);
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-
-               ret = ipath_user_sdma_pin_pages(dd, pkt,
-                                               addr, iov[idx].iov_len,
-                                               npages);
-               if (ret < 0)
-                       goto free_pkt;
-       }
-
-       goto done;
-
-free_pkt:
-       for (idx = 0; idx < pkt->naddr; idx++)
-               ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
-                                       struct ipath_user_sdma_queue *pq,
-                                       struct ipath_user_sdma_pkt *pkt,
-                                       const struct iovec *iov,
-                                       unsigned long niov, int npages)
-{
-       int ret = 0;
-
-       if (npages >= ARRAY_SIZE(pkt->addr))
-               ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
-       else
-               ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
-
-       return ret;
-}
-
-/* free a packet list -- return counter value of last packet */
-static void ipath_user_sdma_free_pkt_list(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct list_head *list)
-{
-       struct ipath_user_sdma_pkt *pkt, *pkt_next;
-
-       list_for_each_entry_safe(pkt, pkt_next, list, list) {
-               int i;
-
-               for (i = 0; i < pkt->naddr; i++)
-                       ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-               kmem_cache_free(pq->pkt_slab, pkt);
-       }
-}
-
-/*
- * copy headers, coalesce etc -- pq->lock must be held
- *
- * we queue all the packets to list, returning the
- * number of bytes total.  list must be empty initially,
- * as, if there is an error we clean it...
- */
-static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_queue *pq,
-                                     struct list_head *list,
-                                     const struct iovec *iov,
-                                     unsigned long niov,
-                                     int maxpkts)
-{
-       unsigned long idx = 0;
-       int ret = 0;
-       int npkts = 0;
-       struct page *page = NULL;
-       __le32 *pbc;
-       dma_addr_t dma_addr;
-       struct ipath_user_sdma_pkt *pkt = NULL;
-       size_t len;
-       size_t nw;
-       u32 counter = pq->counter;
-       int dma_mapped = 0;
-
-       while (idx < niov && npkts < maxpkts) {
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-               const unsigned long idx_save = idx;
-               unsigned pktnw;
-               unsigned pktnwc;
-               int nfrags = 0;
-               int npages = 0;
-               int cfur;
-
-               dma_mapped = 0;
-               len = iov[idx].iov_len;
-               nw = len >> 2;
-               page = NULL;
-
-               pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-               if (!pkt) {
-                       ret = -ENOMEM;
-                       goto free_list;
-               }
-
-               if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
-                   len > PAGE_SIZE || len & 3 || addr & 3) {
-                       ret = -EINVAL;
-                       goto free_pkt;
-               }
-
-               if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
-                       pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
-                                            &dma_addr);
-               else
-                       pbc = NULL;
-
-               if (!pbc) {
-                       page = alloc_page(GFP_KERNEL);
-                       if (!page) {
-                               ret = -ENOMEM;
-                               goto free_pkt;
-                       }
-                       pbc = kmap(page);
-               }
-
-               cfur = copy_from_user(pbc, iov[idx].iov_base, len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_pbc;
-               }
-
-               /*
-                * this assignment is a bit strange.  it's because the
-                * the pbc counts the number of 32 bit words in the full
-                * packet _except_ the first word of the pbc itself...
-                */
-               pktnwc = nw - 1;
-
-               /*
-                * pktnw computation yields the number of 32 bit words
-                * that the caller has indicated in the PBC.  note that
-                * this is one less than the total number of words that
-                * goes to the send DMA engine as the first 32 bit word
-                * of the PBC itself is not counted.  Armed with this count,
-                * we can verify that the packet is consistent with the
-                * iovec lengths.
-                */
-               pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
-               if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-
-               idx++;
-               while (pktnwc < pktnw && idx < niov) {
-                       const size_t slen = iov[idx].iov_len;
-                       const unsigned long faddr =
-                               (unsigned long) iov[idx].iov_base;
-
-                       if (slen & 3 || faddr & 3 || !slen ||
-                           slen > PAGE_SIZE) {
-                               ret = -EINVAL;
-                               goto free_pbc;
-                       }
-
-                       npages++;
-                       if ((faddr & PAGE_MASK) !=
-                           ((faddr + slen - 1) & PAGE_MASK))
-                               npages++;
-
-                       pktnwc += slen >> 2;
-                       idx++;
-                       nfrags++;
-               }
-
-               if (pktnwc != pktnw) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-               if (page) {
-                       dma_addr = dma_map_page(&dd->pcidev->dev,
-                                               page, 0, len, DMA_TO_DEVICE);
-                       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                               ret = -ENOMEM;
-                               goto free_pbc;
-                       }
-
-                       dma_mapped = 1;
-               }
-
-               ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
-                                           page, pbc, dma_addr);
-
-               if (nfrags) {
-                       ret = ipath_user_sdma_init_payload(dd, pq, pkt,
-                                                          iov + idx_save + 1,
-                                                          nfrags, npages);
-                       if (ret < 0)
-                               goto free_pbc_dma;
-               }
-
-               counter++;
-               npkts++;
-
-               list_add_tail(&pkt->list, list);
-       }
-
-       ret = idx;
-       goto done;
-
-free_pbc_dma:
-       if (dma_mapped)
-               dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
-       if (page) {
-               kunmap(page);
-               __free_page(page);
-       } else
-               dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
-       kmem_cache_free(pq->pkt_slab, pkt);
-free_list:
-       ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
-done:
-       return ret;
-}
-
-static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
-                                                u32 c)
-{
-       pq->sent_counter = c;
-}
-
-/* try to clean out queue -- needs pq->lock */
-static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
-                                      struct ipath_user_sdma_queue *pq)
-{
-       struct list_head free_list;
-       struct ipath_user_sdma_pkt *pkt;
-       struct ipath_user_sdma_pkt *pkt_prev;
-       int ret = 0;
-
-       INIT_LIST_HEAD(&free_list);
-
-       list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
-               s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
-
-               if (descd < 0)
-                       break;
-
-               list_move_tail(&pkt->list, &free_list);
-
-               /* one more packet cleaned */
-               ret++;
-       }
-
-       if (!list_empty(&free_list)) {
-               u32 counter;
-
-               pkt = list_entry(free_list.prev,
-                                struct ipath_user_sdma_pkt, list);
-               counter = pkt->counter;
-
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               ipath_user_sdma_set_complete_counter(pq, counter);
-       }
-
-       return ret;
-}
-
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
-{
-       if (!pq)
-               return;
-
-       kmem_cache_destroy(pq->pkt_slab);
-       dma_pool_destroy(pq->header_cache);
-       kfree(pq);
-}
-
-/* clean descriptor queue, returns > 0 if some elements cleaned */
-static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
-{
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       ret = ipath_sdma_make_progress(dd);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-/* we're in close, drain packets so that we can cleanup successfully... */
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq)
-{
-       int i;
-
-       if (!pq)
-               return;
-
-       for (i = 0; i < 100; i++) {
-               mutex_lock(&pq->lock);
-               if (list_empty(&pq->sent)) {
-                       mutex_unlock(&pq->lock);
-                       break;
-               }
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-               mutex_unlock(&pq->lock);
-               msleep(10);
-       }
-
-       if (!list_empty(&pq->sent)) {
-               struct list_head free_list;
-
-               printk(KERN_INFO "drain: lists not empty: forcing!\n");
-               INIT_LIST_HEAD(&free_list);
-               mutex_lock(&pq->lock);
-               list_splice_init(&pq->sent, &free_list);
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               mutex_unlock(&pq->lock);
-       }
-}
-
-static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
-                                          u64 addr, u64 dwlen, u64 dwoffset)
-{
-       return cpu_to_le64(/* SDmaPhyAddr[31:0] */
-                          ((addr & 0xfffffffcULL) << 32) |
-                          /* SDmaGeneration[1:0] */
-                          ((dd->ipath_sdma_generation & 3ULL) << 30) |
-                          /* SDmaDwordCount[10:0] */
-                          ((dwlen & 0x7ffULL) << 16) |
-                          /* SDmaBufOffset[12:2] */
-                          (dwoffset & 0x7ffULL));
-}
-
-static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
-{
-       return descq | cpu_to_le64(1ULL << 12);
-}
-
-static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
-{
-                                             /* last */  /* dma head */
-       return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
-}
-
-static inline __le64 ipath_sdma_make_desc1(u64 addr)
-{
-       /* SDmaPhyAddr[47:32] */
-       return cpu_to_le64(addr >> 32);
-}
-
-static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_pkt *pkt, int idx,
-                                     unsigned ofs, u16 tail)
-{
-       const u64 addr = (u64) pkt->addr[idx].addr +
-               (u64) pkt->addr[idx].offset;
-       const u64 dwlen = (u64) pkt->addr[idx].length / 4;
-       __le64 *descqp;
-       __le64 descq0;
-
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-
-       descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
-       if (idx == 0)
-               descq0 = ipath_sdma_make_first_desc0(descq0);
-       if (idx == pkt->naddr - 1)
-               descq0 = ipath_sdma_make_last_desc0(descq0);
-
-       descqp[0] = descq0;
-       descqp[1] = ipath_sdma_make_desc1(addr);
-}
-
-/* pq->lock must be held, get packets on the wire... */
-static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_queue *pq,
-                                    struct list_head *pktlist)
-{
-       int ret = 0;
-       unsigned long flags;
-       u16 tail;
-
-       if (list_empty(pktlist))
-               return 0;
-
-       if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
-               return -ECOMM;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
-               ret = -ECOMM;
-               goto unlock;
-       }
-
-       tail = dd->ipath_sdma_descq_tail;
-       while (!list_empty(pktlist)) {
-               struct ipath_user_sdma_pkt *pkt =
-                       list_entry(pktlist->next, struct ipath_user_sdma_pkt,
-                                  list);
-               int i;
-               unsigned ofs = 0;
-               u16 dtail = tail;
-
-               if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
-                       goto unlock_check_tail;
-
-               for (i = 0; i < pkt->naddr; i++) {
-                       ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
-                       ofs += pkt->addr[i].length >> 2;
-
-                       if (++tail == dd->ipath_sdma_descq_cnt) {
-                               tail = 0;
-                               ++dd->ipath_sdma_generation;
-                       }
-               }
-
-               if ((ofs<<2) > dd->ipath_ibmaxlen) {
-                       ipath_dbg("packet size %X > ibmax %X, fail\n",
-                               ofs<<2, dd->ipath_ibmaxlen);
-                       ret = -EMSGSIZE;
-                       goto unlock;
-               }
-
-               /*
-                * if the packet is >= 2KB mtu equivalent, we have to use
-                * the large buffers, and have to mark each descriptor as
-                * part of a large buffer packet.
-                */
-               if (ofs >= IPATH_SMALLBUF_DWORDS) {
-                       for (i = 0; i < pkt->naddr; i++) {
-                               dd->ipath_sdma_descq[dtail].qw[0] |=
-                                       cpu_to_le64(1ULL << 14);
-                               if (++dtail == dd->ipath_sdma_descq_cnt)
-                                       dtail = 0;
-                       }
-               }
-
-               dd->ipath_sdma_descq_added += pkt->naddr;
-               pkt->added = dd->ipath_sdma_descq_added;
-               list_move_tail(&pkt->list, &pq->sent);
-               ret++;
-       }
-
-unlock_check_tail:
-       /* advance the tail on the chip if necessary */
-       if (dd->ipath_sdma_descq_tail != tail) {
-               wmb();
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-               dd->ipath_sdma_descq_tail = tail;
-       }
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim)
-{
-       int ret = 0;
-       struct list_head list;
-       int npkts = 0;
-
-       INIT_LIST_HEAD(&list);
-
-       mutex_lock(&pq->lock);
-
-       if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-       }
-
-       while (dim) {
-               const int mxp = 8;
-
-               ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-               if (ret <= 0)
-                       goto done_unlock;
-               else {
-                       dim -= ret;
-                       iov += ret;
-               }
-
-               /* force packets onto the sdma hw queue... */
-               if (!list_empty(&list)) {
-                       /*
-                        * lazily clean hw queue.  the 4 is a guess of about
-                        * how many sdma descriptors a packet will take (it
-                        * doesn't have to be perfect).
-                        */
-                       if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
-                               ipath_user_sdma_hwqueue_clean(dd);
-                               ipath_user_sdma_queue_clean(dd, pq);
-                       }
-
-                       ret = ipath_user_sdma_push_pkts(dd, pq, &list);
-                       if (ret < 0)
-                               goto done_unlock;
-                       else {
-                               npkts += ret;
-                               pq->counter += ret;
-
-                               if (!list_empty(&list))
-                                       goto done_unlock;
-                       }
-               }
-       }
-
-done_unlock:
-       if (!list_empty(&list))
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
-       mutex_unlock(&pq->lock);
-
-       return (ret < 0) ? ret : npkts;
-}
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq)
-{
-       int ret = 0;
-
-       mutex_lock(&pq->lock);
-       ipath_user_sdma_hwqueue_clean(dd);
-       ret = ipath_user_sdma_queue_clean(dd, pq);
-       mutex_unlock(&pq->lock);
-
-       return ret;
-}
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
-{
-       return pq->sent_counter;
-}
-
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
-{
-       return pq->counter;
-}
-
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644 (file)
index fc76316..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/device.h>
-
-struct ipath_user_sdma_queue;
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim);
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq);
-
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq);
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644 (file)
index 1778dee..0000000
+++ /dev/null
@@ -1,2377 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_mad.h>
-#include <rdma/ib_user_verbs.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/utsname.h>
-#include <linux/rculist.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-static unsigned int ib_ipath_qp_table_size = 251;
-module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
-MODULE_PARM_DESC(qp_table_size, "QP table size");
-
-unsigned int ib_ipath_lkey_table_size = 12;
-module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
-                  S_IRUGO);
-MODULE_PARM_DESC(lkey_table_size,
-                "LKEY table size in bits (2^n, 1 <= n <= 23)");
-
-static unsigned int ib_ipath_max_pds = 0xFFFF;
-module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_pds,
-                "Maximum number of protection domains to support");
-
-static unsigned int ib_ipath_max_ahs = 0xFFFF;
-module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-
-unsigned int ib_ipath_max_cqes = 0x2FFFF;
-module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqes,
-                "Maximum number of completion queue entries to support");
-
-unsigned int ib_ipath_max_cqs = 0x1FFFF;
-module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
-
-unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
-module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-
-unsigned int ib_ipath_max_qps = 16384;
-module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
-
-unsigned int ib_ipath_max_sges = 0x60;
-module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
-
-unsigned int ib_ipath_max_mcast_grps = 16384;
-module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_grps,
-                "Maximum number of multicast groups to support");
-
-unsigned int ib_ipath_max_mcast_qp_attached = 16;
-module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_qp_attached,
-                "Maximum number of attached QPs to support");
-
-unsigned int ib_ipath_max_srqs = 1024;
-module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
-
-unsigned int ib_ipath_max_srq_sges = 128;
-module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
-
-unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
-module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-
-static unsigned int ib_ipath_disable_sma;
-module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(disable_sma, "Disable the SMA");
-
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; ipath_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
-       [IB_QPS_RESET] = 0,
-       [IB_QPS_INIT] = IPATH_POST_RECV_OK,
-       [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-       [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
-           IPATH_PROCESS_NEXT_SEND_OK,
-       [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
-       [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-       [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-};
-
-struct ipath_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-                                                 *ibucontext)
-{
-       return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
-       [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-       [IB_WR_SEND] = IB_WC_SEND,
-       [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-       [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
- * System image GUID.
- */
-static __be64 sys_image_guid;
-
-/**
- * ipath_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(sge->vaddr, data, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-}
-
-/*
- * Count the number of DMA descriptors needed to send length bytes of data.
- * Don't modify the ipath_sge_state to get the count.
- * Return zero if any of the segments is not aligned.
- */
-static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sg_list = ss->sg_list;
-       struct ipath_sge sge = ss->sge;
-       u8 num_sge = ss->num_sge;
-       u32 ndesc = 1;  /* count the header */
-
-       while (length) {
-               u32 len = sge.length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge.sge_length)
-                       len = sge.sge_length;
-               BUG_ON(len == 0);
-               if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
-                   (len != length && (len & (sizeof(u32) - 1)))) {
-                       ndesc = 0;
-                       break;
-               }
-               ndesc++;
-               sge.vaddr += len;
-               sge.length -= len;
-               sge.sge_length -= len;
-               if (sge.sge_length == 0) {
-                       if (--num_sge)
-                               sge = *sg_list++;
-               } else if (sge.length == 0 && sge.mr != NULL) {
-                       if (++sge.n >= IPATH_SEGSZ) {
-                               if (++sge.m >= sge.mr->mapsz)
-                                       break;
-                               sge.n = 0;
-                       }
-                       sge.vaddr =
-                               sge.mr->map[sge.m]->segs[sge.n].vaddr;
-                       sge.length =
-                               sge.mr->map[sge.m]->segs[sge.n].length;
-               }
-               length -= len;
-       }
-       return ndesc;
-}
-
-/*
- * Copy from the SGEs to the data buffer.
- */
-static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
-                               u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(data, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-       struct ipath_swqe *wqe;
-       u32 next;
-       int i;
-       int j;
-       int acc;
-       int ret;
-       unsigned long flags;
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (qp->ibqp.qp_type != IB_QPT_SMI &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       /* Check that state is OK to post send. */
-       if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
-               goto bail_inval;
-
-       /* IB spec says that num_sge == 0 is OK. */
-       if (wr->num_sge > qp->s_max_sge)
-               goto bail_inval;
-
-       /*
-        * Don't allow RDMA reads or atomic operations on UC or
-        * undefined operations.
-        * Make sure buffer is large enough to hold the result for atomics.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_UC) {
-               if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-                       goto bail_inval;
-       } else if (qp->ibqp.qp_type == IB_QPT_UD) {
-               /* Check UD opcode */
-               if (wr->opcode != IB_WR_SEND &&
-                   wr->opcode != IB_WR_SEND_WITH_IMM)
-                       goto bail_inval;
-               /* Check UD destination address PD */
-               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-                       goto bail_inval;
-       } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-                  (wr->num_sge == 0 ||
-                   wr->sg_list[0].length < sizeof(u64) ||
-                   wr->sg_list[0].addr & (sizeof(u64) - 1)))
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-               goto bail_inval;
-
-       next = qp->s_head + 1;
-       if (next >= qp->s_size)
-               next = 0;
-       if (next == qp->s_last) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       wqe = get_swqe_ptr(qp, qp->s_head);
-
-       if (qp->ibqp.qp_type != IB_QPT_UC &&
-           qp->ibqp.qp_type != IB_QPT_RC)
-               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-                wr->opcode == IB_WR_RDMA_WRITE ||
-                wr->opcode == IB_WR_RDMA_READ)
-               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-       else
-               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-       wqe->length = 0;
-       if (wr->num_sge) {
-               acc = wr->opcode >= IB_WR_RDMA_READ ?
-                       IB_ACCESS_LOCAL_WRITE : 0;
-               for (i = 0, j = 0; i < wr->num_sge; i++) {
-                       u32 length = wr->sg_list[i].length;
-                       int ok;
-
-                       if (length == 0)
-                               continue;
-                       ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
-                                          &wr->sg_list[i], acc);
-                       if (!ok)
-                               goto bail_inval;
-                       wqe->length += length;
-                       j++;
-               }
-               wqe->wr.num_sge = j;
-       }
-       if (qp->ibqp.qp_type == IB_QPT_UC ||
-           qp->ibqp.qp_type == IB_QPT_RC) {
-               if (wqe->length > 0x80000000U)
-                       goto bail_inval;
-       } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
-               goto bail_inval;
-       wqe->ssn = qp->s_ssn++;
-       qp->s_head = next;
-
-       ret = 0;
-       goto bail;
-
-bail_inval:
-       ret = -EINVAL;
-bail:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-                          struct ib_send_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       int err = 0;
-
-       for (; wr; wr = wr->next) {
-               err = ipath_post_one_send(qp, wr);
-               if (err) {
-                       *bad_wr = wr;
-                       goto bail;
-               }
-       }
-
-       /* Try to do the send work in the caller's context. */
-       ipath_do_send((unsigned long) qp);
-
-bail:
-       return err;
-}
-
-/**
- * ipath_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-                             struct ib_recv_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_rwq *wq = qp->r_rq.wq;
-       unsigned long flags;
-       int ret;
-
-       /* Check that state is OK to post receive. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
-               *bad_wr = wr;
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&qp->r_rq.lock, flags);
-               next = wq->head + 1;
-               if (next >= qp->r_rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qp_rcv - processing an incoming packet on a QP
- * @dev: the device the packet came on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_ib_rcv() to process an incoming packet
- * for the given QP.
- * Called at interrupt level.
- */
-static void ipath_qp_rcv(struct ipath_ibdev *dev,
-                        struct ipath_ib_header *hdr, int has_grh,
-                        void *data, u32 tlen, struct ipath_qp *qp)
-{
-       /* Check for valid receive state. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               return;
-       }
-
-       switch (qp->ibqp.qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               if (ib_ipath_disable_sma)
-                       break;
-               /* FALLTHROUGH */
-       case IB_QPT_UD:
-               ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_RC:
-               ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_UC:
-               ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       default:
-               break;
-       }
-}
-
-/**
- * ipath_ib_rcv - process an incoming packet
- * @arg: the device pointer
- * @rhdr: the header of the packet
- * @data: the packet data
- * @tlen: the packet length
- *
- * This is called from ipath_kreceive() to process an incoming packet at
- * interrupt level. Tlen is the length of the header + data + CRC in bytes.
- */
-void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
-                 u32 tlen)
-{
-       struct ipath_ib_header *hdr = rhdr;
-       struct ipath_other_headers *ohdr;
-       struct ipath_qp *qp;
-       u32 qp_num;
-       int lnh;
-       u8 opcode;
-       u16 lid;
-
-       if (unlikely(dev == NULL))
-               goto bail;
-
-       if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       /* Check for a valid destination LID (see ch. 7.11.1). */
-       lid = be16_to_cpu(hdr->lrh[1]);
-       if (lid < IPATH_MULTICAST_LID_BASE) {
-               lid &= ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid != dev->dd->ipath_lid)) {
-                       dev->rcv_errors++;
-                       goto bail;
-               }
-       }
-
-       /* Check for GRH */
-       lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-       if (lnh == IPATH_LRH_BTH)
-               ohdr = &hdr->u.oth;
-       else if (lnh == IPATH_LRH_GRH)
-               ohdr = &hdr->u.l.oth;
-       else {
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
-       dev->opstats[opcode].n_bytes += tlen;
-       dev->opstats[opcode].n_packets++;
-
-       /* Get the destination QP number. */
-       qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
-       if (qp_num == IPATH_MULTICAST_QPN) {
-               struct ipath_mcast *mcast;
-               struct ipath_mcast_qp *p;
-
-               if (lnh != IPATH_LRH_GRH) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
-               if (mcast == NULL) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               dev->n_multicast_rcv++;
-               list_for_each_entry_rcu(p, &mcast->qp_list, list)
-                       ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
-               /*
-                * Notify ipath_multicast_detach() if it is waiting for us
-                * to finish.
-                */
-               if (atomic_dec_return(&mcast->refcount) <= 1)
-                       wake_up(&mcast->wait);
-       } else {
-               qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
-               if (qp) {
-                       dev->n_unicast_rcv++;
-                       ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-                                    tlen, qp);
-                       /*
-                        * Notify ipath_destroy_qp() if it is waiting
-                        * for us to finish.
-                        */
-                       if (atomic_dec_and_test(&qp->refcount))
-                               wake_up(&qp->wait);
-               } else
-                       dev->n_pkt_drops++;
-       }
-
-bail:;
-}
-
-/**
- * ipath_ib_timer - verbs timer
- * @arg: the device pointer
- *
- * This is called from ipath_do_rcv_timer() at interrupt level to check for
- * QPs which need retransmits and to collect performance numbers.
- */
-static void ipath_ib_timer(struct ipath_ibdev *dev)
-{
-       struct ipath_qp *resend = NULL;
-       struct ipath_qp *rnr = NULL;
-       struct list_head *last;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               return;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       /* Start filling the next pending queue. */
-       if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
-               dev->pending_index = 0;
-       /* Save any requests still in the new queue, they have timed out. */
-       last = &dev->pending[dev->pending_index];
-       while (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               list_del_init(&qp->timerwait);
-               qp->timer_next = resend;
-               resend = qp;
-               atomic_inc(&qp->refcount);
-       }
-       last = &dev->rnrwait;
-       if (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               if (--qp->s_rnr_timeout == 0) {
-                       do {
-                               list_del_init(&qp->timerwait);
-                               qp->timer_next = rnr;
-                               rnr = qp;
-                               atomic_inc(&qp->refcount);
-                               if (list_empty(last))
-                                       break;
-                               qp = list_entry(last->next, struct ipath_qp,
-                                               timerwait);
-                       } while (qp->s_rnr_timeout == 0);
-               }
-       }
-       /*
-        * We should only be in the started state if pma_sample_start != 0
-        */
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
-           --dev->pma_sample_start == 0) {
-               dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-               ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
-                                       &dev->ipath_rword,
-                                       &dev->ipath_spkts,
-                                       &dev->ipath_rpkts,
-                                       &dev->ipath_xmit_wait);
-       }
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
-               if (dev->pma_sample_interval == 0) {
-                       u64 ta, tb, tc, td, te;
-
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-                       ipath_snapshot_counters(dev->dd, &ta, &tb,
-                                               &tc, &td, &te);
-
-                       dev->ipath_sword = ta - dev->ipath_sword;
-                       dev->ipath_rword = tb - dev->ipath_rword;
-                       dev->ipath_spkts = tc - dev->ipath_spkts;
-                       dev->ipath_rpkts = td - dev->ipath_rpkts;
-                       dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
-               } else {
-                       dev->pma_sample_interval--;
-               }
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       /* XXX What if timer fires again while this is running? */
-       while (resend != NULL) {
-               qp = resend;
-               resend = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (qp->s_last != qp->s_tail &&
-                   ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-                       dev->n_timeouts++;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-               }
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-       while (rnr != NULL) {
-               qp = rnr;
-               rnr = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       sge->vaddr += length;
-       sge->length -= length;
-       sge->sge_length -= length;
-       if (sge->sge_length == 0) {
-               if (--ss->num_sge)
-                       *sge = *ss->sg_list++;
-       } else if (sge->length == 0 && sge->mr != NULL) {
-               if (++sge->n >= IPATH_SEGSZ) {
-                       if (++sge->m >= sge->mr->mapsz)
-                               return;
-                       sge->n = 0;
-               }
-               sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-               sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-       }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length, unsigned flush_wc)
-{
-       u32 extra = 0;
-       u32 data = 0;
-       u32 last;
-
-       while (1) {
-               u32 len = ss->sge.length;
-               u32 off;
-
-               if (len > length)
-                       len = length;
-               if (len > ss->sge.sge_length)
-                       len = ss->sge.sge_length;
-               BUG_ON(len == 0);
-               /* If the source address is not aligned, try to align it. */
-               off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-               if (off) {
-                       u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-                                           ~(sizeof(u32) - 1));
-                       u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-                       u32 y;
-
-                       y = sizeof(u32) - off;
-                       if (len > y)
-                               len = y;
-                       if (len + extra >= sizeof(u32)) {
-                               data |= set_upper_bits(v, extra *
-                                                      BITS_PER_BYTE);
-                               len = sizeof(u32) - extra;
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               __raw_writel(data, piobuf);
-                               piobuf++;
-                               extra = 0;
-                               data = 0;
-                       } else {
-                               /* Clear unused upper bytes */
-                               data |= clear_upper_bytes(v, len, extra);
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               extra += len;
-                       }
-               } else if (extra) {
-                       /* Source address is aligned. */
-                       u32 *addr = (u32 *) ss->sge.vaddr;
-                       int shift = extra * BITS_PER_BYTE;
-                       int ushift = 32 - shift;
-                       u32 l = len;
-
-                       while (l >= sizeof(u32)) {
-                               u32 v = *addr;
-
-                               data |= set_upper_bits(v, shift);
-                               __raw_writel(data, piobuf);
-                               data = get_upper_bits(v, ushift);
-                               piobuf++;
-                               addr++;
-                               l -= sizeof(u32);
-                       }
-                       /*
-                        * We still have 'extra' number of bytes leftover.
-                        */
-                       if (l) {
-                               u32 v = *addr;
-
-                               if (l + extra >= sizeof(u32)) {
-                                       data |= set_upper_bits(v, shift);
-                                       len -= l + extra - sizeof(u32);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       __raw_writel(data, piobuf);
-                                       piobuf++;
-                                       extra = 0;
-                                       data = 0;
-                               } else {
-                                       /* Clear unused upper bytes */
-                                       data |= clear_upper_bytes(v, l,
-                                                                 extra);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       extra += l;
-                               }
-                       } else if (len == length) {
-                               last = data;
-                               break;
-                       }
-               } else if (len == length) {
-                       u32 w;
-
-                       /*
-                        * Need to round up for the last dword in the
-                        * packet.
-                        */
-                       w = (len + 3) >> 2;
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-                       piobuf += w - 1;
-                       last = ((u32 *) ss->sge.vaddr)[w - 1];
-                       break;
-               } else {
-                       u32 w = len >> 2;
-
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w);
-                       piobuf += w;
-
-                       extra = len & (sizeof(u32) - 1);
-                       if (extra) {
-                               u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-                               /* Clear unused upper bytes */
-                               data = clear_upper_bytes(v, extra, 0);
-                       }
-               }
-               update_sge(ss, len);
-               length -= len;
-       }
-       /* Update address before sending packet. */
-       update_sge(ss, length);
-       if (flush_wc) {
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(last, piobuf);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-       } else
-               __raw_writel(last, piobuf);
-}
-
-/*
- * Convert IB rate to delay multiplier.
- */
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
-{
-       switch (rate) {
-       case IB_RATE_2_5_GBPS: return 8;
-       case IB_RATE_5_GBPS:   return 4;
-       case IB_RATE_10_GBPS:  return 2;
-       case IB_RATE_20_GBPS:  return 1;
-       default:               return 0;
-       }
-}
-
-/*
- * Convert delay multiplier to IB rate
- */
-static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
-{
-       switch (mult) {
-       case 8:  return IB_RATE_2_5_GBPS;
-       case 4:  return IB_RATE_5_GBPS;
-       case 2:  return IB_RATE_10_GBPS;
-       case 1:  return IB_RATE_20_GBPS;
-       default: return IB_RATE_PORT_CURRENT;
-       }
-}
-
-static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
-{
-       struct ipath_verbs_txreq *tx = NULL;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (!list_empty(&dev->txreq_free)) {
-               struct list_head *l = dev->txreq_free.next;
-
-               list_del(l);
-               tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-       return tx;
-}
-
-static inline void put_txreq(struct ipath_ibdev *dev,
-                            struct ipath_verbs_txreq *tx)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       list_add(&tx->txreq.list, &dev->txreq_free);
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-}
-
-static void sdma_complete(void *cookie, int status)
-{
-       struct ipath_verbs_txreq *tx = cookie;
-       struct ipath_qp *qp = tx->qp;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       unsigned long flags;
-       enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-               IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (tx->wqe)
-                       ipath_send_complete(qp, tx->wqe, ibs);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       } else if (tx->wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, tx->wqe, ibs);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-               kfree(tx->txreq.map_addr);
-       put_txreq(dev, tx);
-
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void decrement_dma_busy(struct ipath_qp *qp)
-{
-       unsigned long flags;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       }
-}
-
-/*
- * Compute the number of clock cycles of delay before sending the next packet.
- * The multipliers reflect the number of clocks for the fastest rate so
- * one tick at 4xDDR is 8 ticks at 1xSDR.
- * If the destination port will take longer to receive a packet than
- * the outgoing link can send it, we need to delay sending the next packet
- * by the difference in time it takes the receiver to receive and the sender
- * to send this packet.
- * Note that this delay is always correct for UC and RC but not always
- * optimal for UD. For UD, the destination HCA can be different for each
- * packet, in which case, we could send packets to a different destination
- * while "waiting" for the delay. The overhead for doing this without
- * HW support is more than just paying the cost of delaying some packets
- * unnecessarily.
- */
-static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
-{
-       return (rcv_mult > snd_mult) ?
-               (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
-}
-
-static int ipath_verbs_send_dma(struct ipath_qp *qp,
-                               struct ipath_ib_header *hdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd = dev->dd;
-       struct ipath_verbs_txreq *tx;
-       u32 *piobuf;
-       u32 control;
-       u32 ndesc;
-       int ret;
-
-       tx = qp->s_tx;
-       if (tx) {
-               qp->s_tx = NULL;
-               /* resend previously constructed packet */
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-               if (ret) {
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       tx = get_txreq(dev);
-       if (!tx) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       tx->qp = qp;
-       atomic_inc(&qp->refcount);
-       tx->wqe = qp->s_wqe;
-       tx->txreq.callback = sdma_complete;
-       tx->txreq.callback_cookie = tx;
-       tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
-               IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
-               control |= 1ULL << 31;
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
-       }
-
-       if (len) {
-               /*
-                * Don't try to DMA if it takes more descriptors than
-                * the queue holds.
-                */
-               ndesc = ipath_count_sge(ss, len);
-               if (ndesc >= dd->ipath_sdma_descq_cnt)
-                       ndesc = 0;
-       } else
-               ndesc = 1;
-       if (ndesc) {
-               tx->hdr.pbc[0] = cpu_to_le32(plen);
-               tx->hdr.pbc[1] = cpu_to_le32(control);
-               memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
-               tx->txreq.sg_count = ndesc;
-               tx->map_len = (hdrwords + 2) << 2;
-               tx->txreq.map_addr = &tx->hdr;
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
-               if (ret) {
-                       /* save ss and length in dwords */
-                       tx->ss = ss;
-                       tx->len = dwords;
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       /* Allocate a buffer and copy the header and payload to it. */
-       tx->map_len = (plen + 1) << 2;
-       piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto err_tx;
-       }
-       tx->txreq.map_addr = piobuf;
-       tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
-       tx->txreq.sg_count = 1;
-
-       *piobuf++ = (__force u32) cpu_to_le32(plen);
-       *piobuf++ = (__force u32) cpu_to_le32(control);
-       memcpy(piobuf, hdr, hdrwords << 2);
-       ipath_copy_from_sge(piobuf + hdrwords, ss, len);
-
-       atomic_inc(&qp->s_dma_busy);
-       ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
-       /*
-        * If we couldn't queue the DMA request, save the info
-        * and try again later rather than destroying the
-        * buffer and undoing the side effects of the copy.
-        */
-       if (ret) {
-               tx->ss = NULL;
-               tx->len = 0;
-               qp->s_tx = tx;
-               decrement_dma_busy(qp);
-       }
-       dev->n_unaligned++;
-       goto bail;
-
-err_tx:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-       put_txreq(dev, tx);
-bail:
-       return ret;
-}
-
-static int ipath_verbs_send_pio(struct ipath_qp *qp,
-                               struct ipath_ib_header *ibhdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 *hdr = (u32 *) ibhdr;
-       u32 __iomem *piobuf;
-       unsigned flush_wc;
-       u32 control;
-       int ret;
-       unsigned long flags;
-
-       piobuf = ipath_getpiobuf(dd, plen, NULL);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
-               control |= 1ULL << 31;
-
-       /*
-        * Write the length to the control qword plus any needed flags.
-        * We have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order.
-        */
-       writeq(((u64) control << 32) | plen, piobuf);
-       piobuf += 2;
-
-       flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
-       if (len == 0) {
-               /*
-                * If there is just the header portion, must flush before
-                * writing last word of header for correctness, and after
-                * the last header word (trigger word).
-                */
-               if (flush_wc) {
-                       ipath_flush_wc();
-                       __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-                       ipath_flush_wc();
-                       __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, hdr, hdrwords);
-               goto done;
-       }
-
-       if (flush_wc)
-               ipath_flush_wc();
-       __iowrite32_copy(piobuf, hdr, hdrwords);
-       piobuf += hdrwords;
-
-       /* The common case is aligned and contained in one segment. */
-       if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-                  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 *addr = (u32 *) ss->sge.vaddr;
-
-               /* Update address before sending packet. */
-               update_sge(ss, len);
-               if (flush_wc) {
-                       __iowrite32_copy(piobuf, addr, dwords - 1);
-                       /* must flush early everything before trigger word */
-                       ipath_flush_wc();
-                       __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
-                       /* be sure trigger word is written */
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, addr, dwords);
-               goto done;
-       }
-       copy_io(piobuf, ss, len, flush_wc);
-done:
-       if (qp->s_wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_verbs_send - send a packet
- * @qp: the QP to send on
- * @hdr: the packet header
- * @hdrwords: the number of 32-bit words in the header
- * @ss: the SGE to send
- * @len: the length of the packet in bytes
- */
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 plen;
-       int ret;
-       u32 dwords = (len + 3) >> 2;
-
-       /*
-        * Calculate the send buffer trigger address.
-        * The +1 counts for the pbc control dword following the pbc length.
-        */
-       plen = hdrwords + dwords + 1;
-
-       /*
-        * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-        * can defer SDMA restart until link goes ACTIVE without
-        * worrying about just how we got there.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_SMI ||
-           !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-       else
-               ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-
-       return ret;
-}
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait)
-{
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs)
-{
-       struct ipath_cregs const *crp = dd->ipath_cregs;
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       cntrs->symbol_error_counter =
-               ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
-       cntrs->link_error_recovery_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
-       /*
-        * The link downed counter counts when the other side downs the
-        * connection.  We add in the number of times we downed the link
-        * due to local link integrity errors to compensate.
-        */
-       cntrs->link_downed_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
-       cntrs->port_rcv_errors =
-               ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
-               ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_portovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
-               ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
-               ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
-               ipath_snap_cntr(dd, crp->cr_erricrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_badformatcnt) +
-               dd->ipath_rxfc_unsupvl_errs;
-       if (crp->cr_rxotherlocalphyerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
-       if (crp->cr_rxvlerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
-       cntrs->port_rcv_remphys_errors =
-               ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
-       cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
-       cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
-       cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
-       cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
-       cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
-       cntrs->local_link_integrity_errors =
-               crp->cr_locallinkintegrityerrcnt ?
-               ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
-               ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-                dd->ipath_lli_errs : dd->ipath_lli_errors);
-       cntrs->excessive_buffer_overrun_errors =
-               crp->cr_excessbufferovflcnt ?
-               ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
-               dd->ipath_overrun_thresh_errs;
-       cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
-               ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_ib_piobufavail - callback when a PIO buffer is available
- * @arg: the device pointer
- *
- * This is called from ipath_intr() at interrupt level when a PIO buffer is
- * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just restart the send tasklet and
- * return zero).
- */
-int ipath_ib_piobufavail(struct ipath_ibdev *dev)
-{
-       struct list_head *list;
-       struct ipath_qp *qplist;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               goto bail;
-
-       list = &dev->piowait;
-       qplist = NULL;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       while (!list_empty(list)) {
-               qp = list_entry(list->next, struct ipath_qp, piowait);
-               list_del_init(&qp->piowait);
-               qp->pio_next = qplist;
-               qplist = qp;
-               atomic_inc(&qp->refcount);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       while (qplist != NULL) {
-               qp = qplist;
-               qplist = qp->pio_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-
-bail:
-       return 0;
-}
-
-static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                             struct ib_udata *uhw)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       memset(props, 0, sizeof(*props));
-
-       props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-               IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-               IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-               IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-       props->page_size_cap = PAGE_SIZE;
-       props->vendor_id =
-               IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
-       props->vendor_part_id = dev->dd->ipath_deviceid;
-       props->hw_ver = dev->dd->ipath_pcirev;
-
-       props->sys_image_guid = dev->sys_image_guid;
-
-       props->max_mr_size = ~0ull;
-       props->max_qp = ib_ipath_max_qps;
-       props->max_qp_wr = ib_ipath_max_qp_wrs;
-       props->max_sge = ib_ipath_max_sges;
-       props->max_sge_rd = ib_ipath_max_sges;
-       props->max_cq = ib_ipath_max_cqs;
-       props->max_ah = ib_ipath_max_ahs;
-       props->max_cqe = ib_ipath_max_cqes;
-       props->max_mr = dev->lk_table.max;
-       props->max_fmr = dev->lk_table.max;
-       props->max_map_per_fmr = 32767;
-       props->max_pd = ib_ipath_max_pds;
-       props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
-       props->max_qp_init_rd_atom = 255;
-       /* props->max_res_rd_atom */
-       props->max_srq = ib_ipath_max_srqs;
-       props->max_srq_wr = ib_ipath_max_srq_wrs;
-       props->max_srq_sge = ib_ipath_max_srq_sges;
-       /* props->local_ca_ack_delay */
-       props->atomic_cap = IB_ATOMIC_GLOB;
-       props->max_pkeys = ipath_get_npkeys(dev->dd);
-       props->max_mcast_grp = ib_ipath_max_mcast_grps;
-       props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
-       props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-               props->max_mcast_grp;
-
-       return 0;
-}
-
-const u8 ipath_cvt_physportstate[32] = {
-       [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
-       [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
-       [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
-};
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
-{
-       return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static int ipath_query_port(struct ib_device *ibdev,
-                           u8 port, struct ib_port_attr *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_devdata *dd = dev->dd;
-       enum ib_mtu mtu;
-       u16 lid = dd->ipath_lid;
-       u64 ibcstat;
-
-       memset(props, 0, sizeof(*props));
-       props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
-       props->lmc = dd->ipath_lmc;
-       props->sm_lid = dev->sm_lid;
-       props->sm_sl = dev->sm_sl;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       /* See phys_state_show() */
-       props->phys_state = /* MEA: assumes shift == 0 */
-               ipath_cvt_physportstate[dd->ipath_lastibcstat &
-               dd->ibcs_lts_mask];
-       props->port_cap_flags = dev->port_cap_flags;
-       props->gid_tbl_len = 1;
-       props->max_msg_sz = 0x80000000;
-       props->pkey_tbl_len = ipath_get_npkeys(dd);
-       props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
-               dev->z_pkey_violations;
-       props->qkey_viol_cntr = dev->qkey_violations;
-       props->active_width = dd->ipath_link_width_active;
-       /* See rate_show() */
-       props->active_speed = dd->ipath_link_speed_active;
-       props->max_vl_num = 1;          /* VLCap = VL0 */
-       props->init_type_reply = 0;
-
-       props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:
-               mtu = IB_MTU_2048;
-       }
-       props->active_mtu = mtu;
-       props->subnet_timeout = dev->subnet_timeout;
-
-       return 0;
-}
-
-static int ipath_modify_device(struct ib_device *device,
-                              int device_modify_mask,
-                              struct ib_device_modify *device_modify)
-{
-       int ret;
-
-       if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
-                                  IB_DEVICE_MODIFY_NODE_DESC)) {
-               ret = -EOPNOTSUPP;
-               goto bail;
-       }
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
-               memcpy(device->node_desc, device_modify->node_desc, 64);
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
-               to_idev(device)->sys_image_guid =
-                       cpu_to_be64(device_modify->sys_image_guid);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_modify_port(struct ib_device *ibdev,
-                            u8 port, int port_modify_mask,
-                            struct ib_port_modify *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       dev->port_cap_flags |= props->set_port_cap_mask;
-       dev->port_cap_flags &= ~props->clr_port_cap_mask;
-       if (port_modify_mask & IB_PORT_SHUTDOWN)
-               ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
-       if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               dev->qkey_violations = 0;
-       return 0;
-}
-
-static int ipath_query_gid(struct ib_device *ibdev, u8 port,
-                          int index, union ib_gid *gid)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= 1) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       gid->global.subnet_prefix = dev->gid_prefix;
-       gid->global.interface_id = dev->dd->ipath_guid;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
-                                   struct ib_ucontext *context,
-                                   struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_pd *pd;
-       struct ib_pd *ret;
-
-       /*
-        * This is actually totally arbitrary.  Some correctness tests
-        * assume there's a maximum number of PDs that can be allocated.
-        * We don't actually have this limit, but we fail the test if
-        * we allow allocations of more than we report for this value.
-        */
-
-       pd = kmalloc(sizeof *pd, GFP_KERNEL);
-       if (!pd) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock(&dev->n_pds_lock);
-       if (dev->n_pds_allocated == ib_ipath_max_pds) {
-               spin_unlock(&dev->n_pds_lock);
-               kfree(pd);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_pds_allocated++;
-       spin_unlock(&dev->n_pds_lock);
-
-       /* ib_alloc_pd() will initialize pd->ibpd. */
-       pd->user = udata != NULL;
-
-       ret = &pd->ibpd;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_pd(struct ib_pd *ibpd)
-{
-       struct ipath_pd *pd = to_ipd(ibpd);
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-
-       spin_lock(&dev->n_pds_lock);
-       dev->n_pds_allocated--;
-       spin_unlock(&dev->n_pds_lock);
-
-       kfree(pd);
-
-       return 0;
-}
-
-/**
- * ipath_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
-                                    struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah;
-       struct ib_ah *ret;
-       struct ipath_ibdev *dev = to_idev(pd->device);
-       unsigned long flags;
-
-       /* A multicast address requires a GRH (see ch. 8.4.1). */
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-           ah_attr->dlid != IPATH_PERMISSIVE_LID &&
-           !(ah_attr->ah_flags & IB_AH_GRH)) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->dlid == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->port_num < 1 ||
-           ah_attr->port_num > pd->device->phys_port_cnt) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-       if (!ah) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-               spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-               kfree(ah);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_ahs_allocated++;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       /* ib_create_ah() will initialize ah->ibah. */
-       ah->attr = *ah_attr;
-       ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
-
-       ret = &ah->ibah;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int ipath_destroy_ah(struct ib_ah *ibah)
-{
-       struct ipath_ibdev *dev = to_idev(ibah->device);
-       struct ipath_ah *ah = to_iah(ibah);
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       dev->n_ahs_allocated--;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       kfree(ah);
-
-       return 0;
-}
-
-static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah = to_iah(ibah);
-
-       *ah_attr = ah->attr;
-       ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
-
-       return 0;
-}
-
-/**
- * ipath_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_get_npkeys(struct ipath_devdata *dd)
-{
-       return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-       unsigned ret;
-
-       /* always a kernel port, no locking needed */
-       if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-               ret = 0;
-       else
-               ret = dd->ipath_pd[0]->port_pkeys[index];
-
-       return ret;
-}
-
-static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-                           u16 *pkey)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= ipath_get_npkeys(dev->dd)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *pkey = ipath_get_pkey(dev->dd, index);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the InfiniPath driver
- */
-
-static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
-                                               struct ib_udata *udata)
-{
-       struct ipath_ucontext *context;
-       struct ib_ucontext *ret;
-
-       context = kmalloc(sizeof *context, GFP_KERNEL);
-       if (!context) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       ret = &context->ibucontext;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(to_iucontext(context));
-       return 0;
-}
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev);
-
-static void __verbs_timer(unsigned long arg)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-       /* Handle verbs layer timeouts. */
-       ipath_ib_timer(dd->verbs_dev);
-
-       mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
-static int enable_timer(struct ipath_devdata *dd)
-{
-       /*
-        * Early chips had a design flaw where the chip and kernel idea
-        * of the tail register don't always agree, and therefore we won't
-        * get an interrupt on the next packet received.
-        * If the board supports per packet receive interrupts, use it.
-        * Otherwise, the timer function periodically checks for packets
-        * to cover this case.
-        * Either way, the timer is needed for verbs layer related
-        * processing.
-        */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-                                0x2074076542310ULL);
-               /* Enable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
-
-       dd->verbs_timer.expires = jiffies + 1;
-       add_timer(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int disable_timer(struct ipath_devdata *dd)
-{
-       /* Disable GPIO bit 2 interrupt */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                /* Disable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-               /*
-                * We might want to undo changes to debugportselect,
-                * but how?
-                */
-       }
-
-       del_timer_sync(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
-                               struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ipath_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-/**
- * ipath_register_ib_device - register our device with the infiniband core
- * @dd: the device data structure
- * Return the allocated ipath_ibdev pointer or NULL on error.
- */
-int ipath_register_ib_device(struct ipath_devdata *dd)
-{
-       struct ipath_verbs_counters cntrs;
-       struct ipath_ibdev *idev;
-       struct ib_device *dev;
-       struct ipath_verbs_txreq *tx;
-       unsigned i;
-       int ret;
-
-       idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-       if (idev == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dev = &idev->ibdev;
-
-       if (dd->ipath_sdma_descq_cnt) {
-               tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
-                                  GFP_KERNEL);
-               if (tx == NULL) {
-                       ret = -ENOMEM;
-                       goto err_tx;
-               }
-       } else
-               tx = NULL;
-       idev->txreq_bufs = tx;
-
-       /* Only need to initialize non-zero fields. */
-       spin_lock_init(&idev->n_pds_lock);
-       spin_lock_init(&idev->n_ahs_lock);
-       spin_lock_init(&idev->n_cqs_lock);
-       spin_lock_init(&idev->n_qps_lock);
-       spin_lock_init(&idev->n_srqs_lock);
-       spin_lock_init(&idev->n_mcast_grps_lock);
-
-       spin_lock_init(&idev->qp_table.lock);
-       spin_lock_init(&idev->lk_table.lock);
-       idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-       /* Set the prefix to the default value (see ch. 4.1.1) */
-       idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
-
-       ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
-       if (ret)
-               goto err_qp;
-
-       /*
-        * The top ib_ipath_lkey_table_size bits are used to index the
-        * table.  The lower 8 bits can be owned by the user (copied from
-        * the LKEY).  The remaining bits act as a generation number or tag.
-        */
-       idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
-       idev->lk_table.table = kcalloc(idev->lk_table.max,
-                                      sizeof(*idev->lk_table.table),
-                                      GFP_KERNEL);
-       if (idev->lk_table.table == NULL) {
-               ret = -ENOMEM;
-               goto err_lk;
-       }
-       INIT_LIST_HEAD(&idev->pending_mmaps);
-       spin_lock_init(&idev->pending_lock);
-       idev->mmap_offset = PAGE_SIZE;
-       spin_lock_init(&idev->mmap_offset_lock);
-       INIT_LIST_HEAD(&idev->pending[0]);
-       INIT_LIST_HEAD(&idev->pending[1]);
-       INIT_LIST_HEAD(&idev->pending[2]);
-       INIT_LIST_HEAD(&idev->piowait);
-       INIT_LIST_HEAD(&idev->rnrwait);
-       INIT_LIST_HEAD(&idev->txreq_free);
-       idev->pending_index = 0;
-       idev->port_cap_flags =
-               IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
-       if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
-               idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-       idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-       idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-       idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-       idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-       idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-       /* Snapshot current HW counters to "clear" them. */
-       ipath_get_counters(dd, &cntrs);
-       idev->z_symbol_error_counter = cntrs.symbol_error_counter;
-       idev->z_link_error_recovery_counter =
-               cntrs.link_error_recovery_counter;
-       idev->z_link_downed_counter = cntrs.link_downed_counter;
-       idev->z_port_rcv_errors = cntrs.port_rcv_errors;
-       idev->z_port_rcv_remphys_errors =
-               cntrs.port_rcv_remphys_errors;
-       idev->z_port_xmit_discards = cntrs.port_xmit_discards;
-       idev->z_port_xmit_data = cntrs.port_xmit_data;
-       idev->z_port_rcv_data = cntrs.port_rcv_data;
-       idev->z_port_xmit_packets = cntrs.port_xmit_packets;
-       idev->z_port_rcv_packets = cntrs.port_rcv_packets;
-       idev->z_local_link_integrity_errors =
-               cntrs.local_link_integrity_errors;
-       idev->z_excessive_buffer_overrun_errors =
-               cntrs.excessive_buffer_overrun_errors;
-       idev->z_vl15_dropped = cntrs.vl15_dropped;
-
-       for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
-               list_add(&tx->txreq.list, &idev->txreq_free);
-
-       /*
-        * The system image GUID is supposed to be the same for all
-        * IB HCAs in a single system but since there can be other
-        * device types in the system, we can't be sure this is unique.
-        */
-       if (!sys_image_guid)
-               sys_image_guid = dd->ipath_guid;
-       idev->sys_image_guid = sys_image_guid;
-       idev->ib_unit = dd->ipath_unit;
-       idev->dd = dd;
-
-       strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
-       dev->owner = THIS_MODULE;
-       dev->node_guid = dd->ipath_guid;
-       dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
-       dev->uverbs_cmd_mask =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-               (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-               (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-               (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-       dev->node_type = RDMA_NODE_IB_CA;
-       dev->phys_port_cnt = 1;
-       dev->num_comp_vectors = 1;
-       dev->dma_device = &dd->pcidev->dev;
-       dev->query_device = ipath_query_device;
-       dev->modify_device = ipath_modify_device;
-       dev->query_port = ipath_query_port;
-       dev->modify_port = ipath_modify_port;
-       dev->query_pkey = ipath_query_pkey;
-       dev->query_gid = ipath_query_gid;
-       dev->alloc_ucontext = ipath_alloc_ucontext;
-       dev->dealloc_ucontext = ipath_dealloc_ucontext;
-       dev->alloc_pd = ipath_alloc_pd;
-       dev->dealloc_pd = ipath_dealloc_pd;
-       dev->create_ah = ipath_create_ah;
-       dev->destroy_ah = ipath_destroy_ah;
-       dev->query_ah = ipath_query_ah;
-       dev->create_srq = ipath_create_srq;
-       dev->modify_srq = ipath_modify_srq;
-       dev->query_srq = ipath_query_srq;
-       dev->destroy_srq = ipath_destroy_srq;
-       dev->create_qp = ipath_create_qp;
-       dev->modify_qp = ipath_modify_qp;
-       dev->query_qp = ipath_query_qp;
-       dev->destroy_qp = ipath_destroy_qp;
-       dev->post_send = ipath_post_send;
-       dev->post_recv = ipath_post_receive;
-       dev->post_srq_recv = ipath_post_srq_receive;
-       dev->create_cq = ipath_create_cq;
-       dev->destroy_cq = ipath_destroy_cq;
-       dev->resize_cq = ipath_resize_cq;
-       dev->poll_cq = ipath_poll_cq;
-       dev->req_notify_cq = ipath_req_notify_cq;
-       dev->get_dma_mr = ipath_get_dma_mr;
-       dev->reg_phys_mr = ipath_reg_phys_mr;
-       dev->reg_user_mr = ipath_reg_user_mr;
-       dev->dereg_mr = ipath_dereg_mr;
-       dev->alloc_fmr = ipath_alloc_fmr;
-       dev->map_phys_fmr = ipath_map_phys_fmr;
-       dev->unmap_fmr = ipath_unmap_fmr;
-       dev->dealloc_fmr = ipath_dealloc_fmr;
-       dev->attach_mcast = ipath_multicast_attach;
-       dev->detach_mcast = ipath_multicast_detach;
-       dev->process_mad = ipath_process_mad;
-       dev->mmap = ipath_mmap;
-       dev->dma_ops = &ipath_dma_mapping_ops;
-       dev->get_port_immutable = ipath_port_immutable;
-
-       snprintf(dev->node_desc, sizeof(dev->node_desc),
-                IPATH_IDSTR " %s", init_utsname()->nodename);
-
-       ret = ib_register_device(dev, NULL);
-       if (ret)
-               goto err_reg;
-
-       ret = ipath_verbs_register_sysfs(dev);
-       if (ret)
-               goto err_class;
-
-       enable_timer(dd);
-
-       goto bail;
-
-err_class:
-       ib_unregister_device(dev);
-err_reg:
-       kfree(idev->lk_table.table);
-err_lk:
-       kfree(idev->qp_table.table);
-err_qp:
-       kfree(idev->txreq_bufs);
-err_tx:
-       ib_dealloc_device(dev);
-       ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-       idev = NULL;
-
-bail:
-       dd->verbs_dev = idev;
-       return ret;
-}
-
-void ipath_unregister_ib_device(struct ipath_ibdev *dev)
-{
-       struct ib_device *ibdev = &dev->ibdev;
-       u32 qps_inuse;
-
-       ib_unregister_device(ibdev);
-
-       disable_timer(dev->dd);
-
-       if (!list_empty(&dev->pending[0]) ||
-           !list_empty(&dev->pending[1]) ||
-           !list_empty(&dev->pending[2]))
-               ipath_dev_err(dev->dd, "pending list not empty!\n");
-       if (!list_empty(&dev->piowait))
-               ipath_dev_err(dev->dd, "piowait list not empty!\n");
-       if (!list_empty(&dev->rnrwait))
-               ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
-       if (!ipath_mcast_tree_empty())
-               ipath_dev_err(dev->dd, "multicast table memory leak!\n");
-       /*
-        * Note that ipath_unregister_ib_device() can be called before all
-        * the QPs are destroyed!
-        */
-       qps_inuse = ipath_free_all_qps(&dev->qp_table);
-       if (qps_inuse)
-               ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
-                       qps_inuse);
-       kfree(dev->qp_table.table);
-       kfree(dev->lk_table.table);
-       kfree(dev->txreq_bufs);
-       ib_dealloc_device(ibdev);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-
-       return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int ret;
-
-       ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
-       if (ret < 0)
-               goto bail;
-       strcat(buf, "\n");
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_stats(struct device *device, struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int i;
-       int len;
-
-       len = sprintf(buf,
-                     "RC resends  %d\n"
-                     "RC no QACK  %d\n"
-                     "RC ACKs     %d\n"
-                     "RC SEQ NAKs %d\n"
-                     "RC RDMA seq %d\n"
-                     "RC RNR NAKs %d\n"
-                     "RC OTH NAKs %d\n"
-                     "RC timeouts %d\n"
-                     "RC RDMA dup %d\n"
-                     "piobuf wait %d\n"
-                     "unaligned   %d\n"
-                     "PKT drops   %d\n"
-                     "WQE errs    %d\n",
-                     dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
-                     dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
-                     dev->n_other_naks, dev->n_timeouts,
-                     dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
-                     dev->n_pkt_drops, dev->n_wqe_errs);
-       for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
-               const struct ipath_opcode_stats *si = &dev->opstats[i];
-
-               if (!si->n_packets && !si->n_bytes)
-                       continue;
-               len += sprintf(buf + len, "%02x %llu/%llu\n", i,
-                              (unsigned long long) si->n_packets,
-                              (unsigned long long) si->n_bytes);
-       }
-       return len;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-
-static struct device_attribute *ipath_class_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_hca_type,
-       &dev_attr_board_id,
-       &dev_attr_stats
-};
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
-               ret = device_create_file(&dev->dev,
-                                      ipath_class_attributes[i]);
-               if (ret)
-                       goto bail;
-       }
-       return 0;
-bail:
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
-               device_remove_file(&dev->dev, ipath_class_attributes[i]);
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644 (file)
index 0a90a56..0000000
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IPATH_VERBS_H
-#define IPATH_VERBS_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/kref.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "ipath_kernel.h"
-
-#define IPATH_MAX_RDMA_ATOMIC  4
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define IPATH_UVERBS_ABI_VERSION       2
-
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE     (IB_CQ_NEXT_COMP + 1)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK                     0x20
-#define IB_NAK_PSN_ERROR               0x60
-#define IB_NAK_INVALID_REQUEST         0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR     0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST      0x64
-
-/* Flags for checking QP state (see ib_ipath_state_ops[]) */
-#define IPATH_POST_SEND_OK             0x01
-#define IPATH_POST_RECV_OK             0x02
-#define IPATH_PROCESS_RECV_OK          0x04
-#define IPATH_PROCESS_SEND_OK          0x08
-#define IPATH_PROCESS_NEXT_SEND_OK     0x10
-#define IPATH_FLUSH_SEND               0x20
-#define IPATH_FLUSH_RECV               0x40
-#define IPATH_PROCESS_OR_FLUSH_SEND \
-       (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
-
-/* IB Performance Manager status values */
-#define IB_PMA_SAMPLE_STATUS_DONE      0x00
-#define IB_PMA_SAMPLE_STATUS_STARTED   0x01
-#define IB_PMA_SAMPLE_STATUS_RUNNING   0x02
-
-/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA  cpu_to_be16(0x0001)
-#define IB_PMA_PORT_RCV_DATA   cpu_to_be16(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS  cpu_to_be16(0x0003)
-#define IB_PMA_PORT_RCV_PKTS   cpu_to_be16(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT  cpu_to_be16(0x0005)
-
-struct ib_reth {
-       __be64 vaddr;
-       __be32 rkey;
-       __be32 length;
-} __attribute__ ((packed));
-
-struct ib_atomic_eth {
-       __be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
-       __be32 rkey;
-       __be64 swap_data;
-       __be64 compare_data;
-} __attribute__ ((packed));
-
-struct ipath_other_headers {
-       __be32 bth[3];
-       union {
-               struct {
-                       __be32 deth[2];
-                       __be32 imm_data;
-               } ud;
-               struct {
-                       struct ib_reth reth;
-                       __be32 imm_data;
-               } rc;
-               struct {
-                       __be32 aeth;
-                       __be32 atomic_ack_eth[2];
-               } at;
-               __be32 imm_data;
-               __be32 aeth;
-               struct ib_atomic_eth atomic_eth;
-       } u;
-} __attribute__ ((packed));
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct ipath_ib_header {
-       __be16 lrh[4];
-       union {
-               struct {
-                       struct ib_grh grh;
-                       struct ipath_other_headers oth;
-               } l;
-               struct ipath_other_headers oth;
-       } u;
-} __attribute__ ((packed));
-
-struct ipath_pio_header {
-       __le32 pbc[2];
-       struct ipath_ib_header hdr;
-} __attribute__ ((packed));
-
-/*
- * There is one struct ipath_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct ipath_mcast_qp.
- */
-struct ipath_mcast_qp {
-       struct list_head list;
-       struct ipath_qp *qp;
-};
-
-struct ipath_mcast {
-       struct rb_node rb_node;
-       union ib_gid mgid;
-       struct list_head qp_list;
-       wait_queue_head_t wait;
-       atomic_t refcount;
-       int n_attached;
-};
-
-/* Protection domain */
-struct ipath_pd {
-       struct ib_pd ibpd;
-       int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct ipath_ah {
-       struct ib_ah ibah;
-       struct ib_ah_attr attr;
-};
-
-/*
- * This structure is used by ipath_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct ipath_mmap_info {
-       struct list_head pending_mmaps;
-       struct ib_ucontext *context;
-       void *obj;
-       __u64 offset;
-       struct kref ref;
-       unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct ipath_cq_wc {
-       u32 head;               /* index of next entry to fill */
-       u32 tail;               /* index of next ib_poll_cq() entry */
-       union {
-               /* these are actually size ibcq.cqe + 1 */
-               struct ib_uverbs_wc uqueue[0];
-               struct ib_wc kqueue[0];
-       };
-};
-
-/*
- * The completion queue structure.
- */
-struct ipath_cq {
-       struct ib_cq ibcq;
-       struct tasklet_struct comptask;
-       spinlock_t lock;
-       u8 notify;
-       u8 triggered;
-       struct ipath_cq_wc *queue;
-       struct ipath_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-       void *vaddr;
-       size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-       struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-       struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-       u64 user_base;          /* User's address for this region */
-       u64 iova;               /* IB start address of this region */
-       size_t length;
-       u32 lkey;
-       u32 offset;             /* offset (bytes) to start of region */
-       int access_flags;
-       u32 max_segs;           /* number of ipath_segs in all the arrays */
-       u32 mapsz;              /* size of the map array */
-       struct ipath_segarray *map[0];  /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-       struct ipath_mregion *mr;
-       void *vaddr;            /* kernel virtual address of segment */
-       u32 sge_length;         /* length of the SGE */
-       u32 length;             /* remaining length of the segment */
-       u16 m;                  /* current index: mr->map[m] */
-       u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct ipath_mr {
-       struct ib_mr ibmr;
-       struct ib_umem *umem;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct ipath_swqe {
-       union {
-               struct ib_send_wr wr;   /* don't use wr.sg_list */
-               struct ib_ud_wr ud_wr;
-               struct ib_rdma_wr rdma_wr;
-               struct ib_atomic_wr atomic_wr;
-       };
-
-       u32 psn;                /* first packet sequence number */
-       u32 lpsn;               /* last packet sequence number */
-       u32 ssn;                /* send sequence number */
-       u32 length;             /* total length of data in sg_list */
-       struct ipath_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct ipath_rwqe {
-       u64 wr_id;
-       u8 num_sge;
-       struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct ipath_rwq {
-       u32 head;               /* new work requests posted to the head */
-       u32 tail;               /* receives pull requests from here. */
-       struct ipath_rwqe wq[0];
-};
-
-struct ipath_rq {
-       struct ipath_rwq *wq;
-       spinlock_t lock;
-       u32 size;               /* size of RWQE array */
-       u8 max_sge;
-};
-
-struct ipath_srq {
-       struct ib_srq ibsrq;
-       struct ipath_rq rq;
-       struct ipath_mmap_info *ip;
-       /* send signal when number of RWQEs < limit */
-       u32 limit;
-};
-
-struct ipath_sge_state {
-       struct ipath_sge *sg_list;      /* next SGE to be used if any */
-       struct ipath_sge sge;   /* progress state for the current SGE */
-       u8 num_sge;
-       u8 static_rate;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct ipath_ack_entry {
-       u8 opcode;
-       u8 sent;
-       u32 psn;
-       union {
-               struct ipath_sge_state rdma_sge;
-               u64 atomic_data;
-       };
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct ipath_qp {
-       struct ib_qp ibqp;
-       struct ipath_qp *next;          /* link list for QPN hash table */
-       struct ipath_qp *timer_next;    /* link list for ipath_ib_timer() */
-       struct ipath_qp *pio_next;      /* link for ipath_ib_piobufavail() */
-       struct list_head piowait;       /* link for wait PIO buf */
-       struct list_head timerwait;     /* link for waiting for timeouts */
-       struct ib_ah_attr remote_ah_attr;
-       struct ipath_ib_header s_hdr;   /* next packet header to send */
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       wait_queue_head_t wait_dma;
-       struct tasklet_struct s_task;
-       struct ipath_mmap_info *ip;
-       struct ipath_sge_state *s_cur_sge;
-       struct ipath_verbs_txreq *s_tx;
-       struct ipath_sge_state s_sge;   /* current send request data */
-       struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
-       struct ipath_sge_state s_ack_rdma_sge;
-       struct ipath_sge_state s_rdma_read_sge;
-       struct ipath_sge_state r_sge;   /* current receive data */
-       spinlock_t s_lock;
-       atomic_t s_dma_busy;
-       u16 s_pkt_delay;
-       u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-       u32 s_cur_size;         /* size of send packet in bytes */
-       u32 s_len;              /* total length of s_sge */
-       u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-       u32 s_next_psn;         /* PSN for next request */
-       u32 s_last_psn;         /* last response PSN processed */
-       u32 s_psn;              /* current packet sequence number */
-       u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-       u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-       u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-       u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-       u64 r_wr_id;            /* ID for current receive WQE */
-       unsigned long r_aflags;
-       u32 r_len;              /* total length of r_sge */
-       u32 r_rcv_len;          /* receive data len processed */
-       u32 r_psn;              /* expected rcv packet sequence number */
-       u32 r_msn;              /* message sequence number */
-       u8 state;               /* QP state */
-       u8 s_state;             /* opcode of last packet sent */
-       u8 s_ack_state;         /* opcode of packet to ACK */
-       u8 s_nak_state;         /* non-zero if NAK is pending */
-       u8 r_state;             /* opcode of last packet received */
-       u8 r_nak_state;         /* non-zero if NAK is pending */
-       u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-       u8 r_flags;
-       u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-       u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-       u8 qp_access_flags;
-       u8 s_max_sge;           /* size of s_wq->sg_list */
-       u8 s_retry_cnt;         /* number of times to retry */
-       u8 s_rnr_retry_cnt;
-       u8 s_retry;             /* requester retry counter */
-       u8 s_rnr_retry;         /* requester RNR retry counter */
-       u8 s_pkey_index;        /* PKEY index to use */
-       u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-       u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-       u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-       u8 s_flags;
-       u8 s_dmult;
-       u8 s_draining;
-       u8 timeout;             /* Timeout for this QP */
-       enum ib_mtu path_mtu;
-       u32 remote_qpn;
-       u32 qkey;               /* QKEY for this QP (for UD or RD) */
-       u32 s_size;             /* send work queue size */
-       u32 s_head;             /* new entries added here */
-       u32 s_tail;             /* next entry to process */
-       u32 s_cur;              /* current work queue entry */
-       u32 s_last;             /* last un-ACK'ed entry */
-       u32 s_ssn;              /* SSN of tail entry */
-       u32 s_lsn;              /* limit sequence number (credit) */
-       struct ipath_swqe *s_wq;        /* send work queue */
-       struct ipath_swqe *s_wqe;
-       struct ipath_sge *r_ud_sg_list;
-       struct ipath_rq r_rq;           /* receive work queue */
-       struct ipath_sge r_sg_list[0];  /* verified SGEs */
-};
-
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define IPATH_R_WRID_VALID     0
-
-/*
- * Bit definitions for r_flags.
- */
-#define IPATH_R_REUSE_SGE      0x01
-#define IPATH_R_RDMAR_SEQ      0x02
-
-/*
- * Bit definitions for s_flags.
- *
- * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
- * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                   next send completion entry not via send DMA.
- */
-#define IPATH_S_SIGNAL_REQ_WR  0x01
-#define IPATH_S_FENCE_PENDING  0x02
-#define IPATH_S_RDMAR_PENDING  0x04
-#define IPATH_S_ACK_PENDING    0x08
-#define IPATH_S_BUSY           0x10
-#define IPATH_S_WAITING                0x20
-#define IPATH_S_WAIT_SSN_CREDIT        0x40
-#define IPATH_S_WAIT_DMA       0x80
-
-#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
-       IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
-
-#define IPATH_PSN_CREDIT       512
-
-/*
- * Since struct ipath_swqe is not a fixed size, we can't simply index into
- * struct ipath_qp.s_wq.  This function does the array index computation.
- */
-static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
-                                             unsigned n)
-{
-       return (struct ipath_swqe *)((char *)qp->s_wq +
-                                    (sizeof(struct ipath_swqe) +
-                                     qp->s_max_sge *
-                                     sizeof(struct ipath_sge)) * n);
-}
-
-/*
- * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rwq.wq.  This function does the array index computation.
- */
-static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
-                                             unsigned n)
-{
-       return (struct ipath_rwqe *)
-               ((char *) rq->wq->wq +
-                (sizeof(struct ipath_rwqe) +
-                 rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-       atomic_t n_free;
-       void *page;
-};
-
-struct ipath_qp_table {
-       spinlock_t lock;
-       u32 last;               /* last QP number allocated */
-       u32 max;                /* size of the hash table */
-       u32 nmaps;              /* size of the map table */
-       struct ipath_qp **table;
-       /* bit map of free numbers */
-       struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct ipath_lkey_table {
-       spinlock_t lock;
-       u32 next;               /* next unused index (speeds search) */
-       u32 gen;                /* generation count */
-       u32 max;                /* size of the table */
-       struct ipath_mregion **table;
-};
-
-struct ipath_opcode_stats {
-       u64 n_packets;          /* number of packets */
-       u64 n_bytes;            /* total number of bytes */
-};
-
-struct ipath_ibdev {
-       struct ib_device ibdev;
-       struct ipath_devdata *dd;
-       struct list_head pending_mmaps;
-       spinlock_t mmap_offset_lock;
-       u32 mmap_offset;
-       int ib_unit;            /* This is the device number */
-       u16 sm_lid;             /* in host order */
-       u8 sm_sl;
-       u8 mkeyprot;
-       /* non-zero when timer is set */
-       unsigned long mkey_lease_timeout;
-
-       /* The following fields are really per port. */
-       struct ipath_qp_table qp_table;
-       struct ipath_lkey_table lk_table;
-       struct list_head pending[3];    /* FIFO of QPs waiting for ACKs */
-       struct list_head piowait;       /* list for wait PIO buf */
-       struct list_head txreq_free;
-       void *txreq_bufs;
-       /* list of QPs waiting for RNR timer */
-       struct list_head rnrwait;
-       spinlock_t pending_lock;
-       __be64 sys_image_guid;  /* in network order */
-       __be64 gid_prefix;      /* in network order */
-       __be64 mkey;
-
-       u32 n_pds_allocated;    /* number of PDs allocated for device */
-       spinlock_t n_pds_lock;
-       u32 n_ahs_allocated;    /* number of AHs allocated for device */
-       spinlock_t n_ahs_lock;
-       u32 n_cqs_allocated;    /* number of CQs allocated for device */
-       spinlock_t n_cqs_lock;
-       u32 n_qps_allocated;    /* number of QPs allocated for device */
-       spinlock_t n_qps_lock;
-       u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-       spinlock_t n_srqs_lock;
-       u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-       spinlock_t n_mcast_grps_lock;
-
-       u64 ipath_sword;        /* total dwords sent (sample result) */
-       u64 ipath_rword;        /* total dwords received (sample result) */
-       u64 ipath_spkts;        /* total packets sent (sample result) */
-       u64 ipath_rpkts;        /* total packets received (sample result) */
-       /* # of ticks no data sent (sample result) */
-       u64 ipath_xmit_wait;
-       u64 rcv_errors;         /* # of packets with SW detected rcv errs */
-       u64 n_unicast_xmit;     /* total unicast packets sent */
-       u64 n_unicast_rcv;      /* total unicast packets received */
-       u64 n_multicast_xmit;   /* total multicast packets sent */
-       u64 n_multicast_rcv;    /* total multicast packets received */
-       u64 z_symbol_error_counter;             /* starting count for PMA */
-       u64 z_link_error_recovery_counter;      /* starting count for PMA */
-       u64 z_link_downed_counter;              /* starting count for PMA */
-       u64 z_port_rcv_errors;                  /* starting count for PMA */
-       u64 z_port_rcv_remphys_errors;          /* starting count for PMA */
-       u64 z_port_xmit_discards;               /* starting count for PMA */
-       u64 z_port_xmit_data;                   /* starting count for PMA */
-       u64 z_port_rcv_data;                    /* starting count for PMA */
-       u64 z_port_xmit_packets;                /* starting count for PMA */
-       u64 z_port_rcv_packets;                 /* starting count for PMA */
-       u32 z_pkey_violations;                  /* starting count for PMA */
-       u32 z_local_link_integrity_errors;      /* starting count for PMA */
-       u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
-       u32 z_vl15_dropped;                     /* starting count for PMA */
-       u32 n_rc_resends;
-       u32 n_rc_acks;
-       u32 n_rc_qacks;
-       u32 n_seq_naks;
-       u32 n_rdma_seq;
-       u32 n_rnr_naks;
-       u32 n_other_naks;
-       u32 n_timeouts;
-       u32 n_pkt_drops;
-       u32 n_vl15_dropped;
-       u32 n_wqe_errs;
-       u32 n_rdma_dup_busy;
-       u32 n_piowait;
-       u32 n_unaligned;
-       u32 port_cap_flags;
-       u32 pma_sample_start;
-       u32 pma_sample_interval;
-       __be16 pma_counter_select[5];
-       u16 pma_tag;
-       u16 qkey_violations;
-       u16 mkey_violations;
-       u16 mkey_lease_period;
-       u16 pending_index;      /* which pending queue is active */
-       u8 pma_sample_status;
-       u8 subnet_timeout;
-       u8 vl_high_limit;
-       struct ipath_opcode_stats opstats[128];
-};
-
-struct ipath_verbs_counters {
-       u64 symbol_error_counter;
-       u64 link_error_recovery_counter;
-       u64 link_downed_counter;
-       u64 port_rcv_errors;
-       u64 port_rcv_remphys_errors;
-       u64 port_xmit_discards;
-       u64 port_xmit_data;
-       u64 port_rcv_data;
-       u64 port_xmit_packets;
-       u64 port_rcv_packets;
-       u32 local_link_integrity_errors;
-       u32 excessive_buffer_overrun_errors;
-       u32 vl15_dropped;
-};
-
-struct ipath_verbs_txreq {
-       struct ipath_qp         *qp;
-       struct ipath_swqe       *wqe;
-       u32                      map_len;
-       u32                      len;
-       struct ipath_sge_state  *ss;
-       struct ipath_pio_header  hdr;
-       struct ipath_sdma_txreq  txreq;
-};
-
-static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct ipath_mr, ibmr);
-}
-
-static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct ipath_pd, ibpd);
-}
-
-static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct ipath_ah, ibah);
-}
-
-static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct ipath_cq, ibcq);
-}
-
-static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
-{
-       return container_of(ibsrq, struct ipath_srq, ibsrq);
-}
-
-static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct ipath_qp, ibqp);
-}
-
-static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct ipath_ibdev, ibdev);
-}
-
-/*
- * This must be called with s_lock held.
- */
-static inline void ipath_schedule_send(struct ipath_qp *qp)
-{
-       if (qp->s_flags & IPATH_S_ANY_WAIT)
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-       if (!(qp->s_flags & IPATH_S_BUSY))
-               tasklet_hi_schedule(&qp->s_task);
-}
-
-int ipath_process_mad(struct ib_device *ibdev,
-                     int mad_flags,
-                     u8 port_num,
-                     const struct ib_wc *in_wc,
-                     const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index);
-
-/*
- * Compare the lower 24 bits of the two values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int ipath_cmp24(u32 a, u32 b)
-{
-       return (((int) a) - ((int) b)) << 8;
-}
-
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait);
-
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs);
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_mcast_tree_empty(void);
-
-__be32 ipath_compute_aeth(struct ipath_qp *qp);
-
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
-
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata);
-
-int ipath_destroy_qp(struct ib_qp *ibqp);
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata);
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
-
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len);
-
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
-
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
-                    struct ipath_mregion *mr);
-
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc);
-
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr);
-
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata);
-
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata);
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int ipath_destroy_srq(struct ib_srq *ibsrq);
-
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata);
-
-int ipath_destroy_cq(struct ib_cq *ibcq);
-
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start);
-
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata);
-
-int ipath_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr);
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova);
-
-int ipath_unmap_fmr(struct list_head *fmr_list);
-
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
-
-void ipath_release_mmap_info(struct kref *ref);
-
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj);
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj);
-
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-void ipath_insert_rnr_queue(struct ipath_qp *qp);
-
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss);
-
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords);
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2);
-
-void ipath_do_send(unsigned long data);
-
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status);
-
-int ipath_make_rc_req(struct ipath_qp *qp);
-
-int ipath_make_uc_req(struct ipath_qp *qp);
-
-int ipath_make_ud_req(struct ipath_qp *qp);
-
-int ipath_register_ib_device(struct ipath_devdata *);
-
-void ipath_unregister_ib_device(struct ipath_ibdev *);
-
-void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
-
-int ipath_ib_piobufavail(struct ipath_ibdev *);
-
-unsigned ipath_get_npkeys(struct ipath_devdata *);
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *);
-
-unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
-
-extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
-
-/*
- * Below converts HCA-specific LinkTrainingState to IB PhysPortState
- * values.
- */
-extern const u8 ipath_cvt_physportstate[];
-#define IB_PHYSPORTSTATE_SLEEP 1
-#define IB_PHYSPORTSTATE_POLL 2
-#define IB_PHYSPORTSTATE_DISABLED 3
-#define IB_PHYSPORTSTATE_CFG_TRAIN 4
-#define IB_PHYSPORTSTATE_LINKUP 5
-#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
-
-extern const int ib_ipath_state_ops[];
-
-extern unsigned int ib_ipath_lkey_table_size;
-
-extern unsigned int ib_ipath_max_cqes;
-
-extern unsigned int ib_ipath_max_cqs;
-
-extern unsigned int ib_ipath_max_qp_wrs;
-
-extern unsigned int ib_ipath_max_qps;
-
-extern unsigned int ib_ipath_max_sges;
-
-extern unsigned int ib_ipath_max_mcast_grps;
-
-extern unsigned int ib_ipath_max_mcast_qp_attached;
-
-extern unsigned int ib_ipath_max_srqs;
-
-extern unsigned int ib_ipath_max_srq_sges;
-
-extern unsigned int ib_ipath_max_srq_wrs;
-
-extern const u32 ib_ipath_rnr_table[];
-
-extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
-
-#endif                         /* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644 (file)
index 72d476f..0000000
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-#include <linux/slab.h>
-
-#include "ipath_verbs.h"
-
-/*
- * Global table of GID to attached QPs.
- * The table is global to all ipath devices since a send from one QP/device
- * needs to be locally routed to any locally attached QPs on the same
- * or different device.
- */
-static struct rb_root mcast_tree;
-static DEFINE_SPINLOCK(mcast_lock);
-
-/**
- * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
-{
-       struct ipath_mcast_qp *mqp;
-
-       mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
-       if (!mqp)
-               goto bail;
-
-       mqp->qp = qp;
-       atomic_inc(&qp->refcount);
-
-bail:
-       return mqp;
-}
-
-static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
-{
-       struct ipath_qp *qp = mqp->qp;
-
-       /* Notify ipath_destroy_qp() if it is waiting. */
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-
-       kfree(mqp);
-}
-
-/**
- * ipath_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
-{
-       struct ipath_mcast *mcast;
-
-       mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
-       if (!mcast)
-               goto bail;
-
-       mcast->mgid = *mgid;
-       INIT_LIST_HEAD(&mcast->qp_list);
-       init_waitqueue_head(&mcast->wait);
-       atomic_set(&mcast->refcount, 0);
-       mcast->n_attached = 0;
-
-bail:
-       return mcast;
-}
-
-static void ipath_mcast_free(struct ipath_mcast *mcast)
-{
-       struct ipath_mcast_qp *p, *tmp;
-
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-               ipath_mcast_qp_free(p);
-
-       kfree(mcast);
-}
-
-/**
- * ipath_mcast_find - search the global table for the given multicast GID
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
-{
-       struct rb_node *n;
-       unsigned long flags;
-       struct ipath_mcast *mcast;
-
-       spin_lock_irqsave(&mcast_lock, flags);
-       n = mcast_tree.rb_node;
-       while (n) {
-               int ret;
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mgid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else {
-                       atomic_inc(&mcast->refcount);
-                       spin_unlock_irqrestore(&mcast_lock, flags);
-                       goto bail;
-               }
-       }
-       spin_unlock_irqrestore(&mcast_lock, flags);
-
-       mcast = NULL;
-
-bail:
-       return mcast;
-}
-
-/**
- * ipath_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int ipath_mcast_add(struct ipath_ibdev *dev,
-                          struct ipath_mcast *mcast,
-                          struct ipath_mcast_qp *mqp)
-{
-       struct rb_node **n = &mcast_tree.rb_node;
-       struct rb_node *pn = NULL;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       while (*n) {
-               struct ipath_mcast *tmcast;
-               struct ipath_mcast_qp *p;
-
-               pn = *n;
-               tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0) {
-                       n = &pn->rb_left;
-                       continue;
-               }
-               if (ret > 0) {
-                       n = &pn->rb_right;
-                       continue;
-               }
-
-               /* Search the QP list to see if this is already there. */
-               list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-                       if (p->qp == mqp->qp) {
-                               ret = ESRCH;
-                               goto bail;
-                       }
-               }
-               if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
-                       ret = ENOMEM;
-                       goto bail;
-               }
-
-               tmcast->n_attached++;
-
-               list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-               ret = EEXIST;
-               goto bail;
-       }
-
-       spin_lock(&dev->n_mcast_grps_lock);
-       if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
-               spin_unlock(&dev->n_mcast_grps_lock);
-               ret = ENOMEM;
-               goto bail;
-       }
-
-       dev->n_mcast_grps_allocated++;
-       spin_unlock(&dev->n_mcast_grps_lock);
-
-       mcast->n_attached++;
-
-       list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-       atomic_inc(&mcast->refcount);
-       rb_link_node(&mcast->rb_node, pn, n);
-       rb_insert_color(&mcast->rb_node, &mcast_tree);
-
-       ret = 0;
-
-bail:
-       spin_unlock_irq(&mcast_lock);
-
-       return ret;
-}
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast;
-       struct ipath_mcast_qp *mqp;
-       int ret;
-
-       /*
-        * Allocate data structures since its better to do this outside of
-        * spin locks and it will most likely be needed.
-        */
-       mcast = ipath_mcast_alloc(gid);
-       if (mcast == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       mqp = ipath_mcast_qp_alloc(qp);
-       if (mqp == NULL) {
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       }
-       switch (ipath_mcast_add(dev, mcast, mqp)) {
-       case ESRCH:
-               /* Neither was used: can't attach the same QP twice. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -EINVAL;
-               goto bail;
-       case EEXIST:            /* The mcast wasn't used */
-               ipath_mcast_free(mcast);
-               break;
-       case ENOMEM:
-               /* Exceeded the maximum number of mcast groups. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       default:
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast = NULL;
-       struct ipath_mcast_qp *p, *tmp;
-       struct rb_node *n;
-       int last = 0;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       /* Find the GID in the mcast table. */
-       n = mcast_tree.rb_node;
-       while (1) {
-               if (n == NULL) {
-                       spin_unlock_irq(&mcast_lock);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-               ret = memcmp(gid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else
-                       break;
-       }
-
-       /* Search the QP list. */
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-               if (p->qp != qp)
-                       continue;
-               /*
-                * We found it, so remove it, but don't poison the forward
-                * link until we are sure there are no list walkers.
-                */
-               list_del_rcu(&p->list);
-               mcast->n_attached--;
-
-               /* If this was the last attached QP, remove the GID too. */
-               if (list_empty(&mcast->qp_list)) {
-                       rb_erase(&mcast->rb_node, &mcast_tree);
-                       last = 1;
-               }
-               break;
-       }
-
-       spin_unlock_irq(&mcast_lock);
-
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               ipath_mcast_qp_free(p);
-       }
-       if (last) {
-               atomic_dec(&mcast->refcount);
-               wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-               ipath_mcast_free(mcast);
-               spin_lock_irq(&dev->n_mcast_grps_lock);
-               dev->n_mcast_grps_allocated--;
-               spin_unlock_irq(&dev->n_mcast_grps_lock);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_mcast_tree_empty(void)
-{
-       return mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644 (file)
index 1a7e20a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on PowerPC only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * Nothing to do on PowerPC, so just return without error.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644 (file)
index 7b6e4c8..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on x86_64 only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include <linux/pci.h>
-#include <asm/processor.h>
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
- * write combining.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       u64 pioaddr, piolen;
-       unsigned bits;
-       const unsigned long addr = pci_resource_start(dd->pcidev, 0);
-       const size_t len = pci_resource_len(dd->pcidev, 0);
-
-       /*
-        * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
-        * chip.  Linux (possibly the hardware) requires it to be on a power
-        * of 2 address matching the length (which has to be a power of 2).
-        * For rev1, that means the base address, for rev2, it will be just
-        * the PIO buffers themselves.
-        * For chips with two sets of buffers, the calculations are
-        * somewhat more complicated; we need to sum, and the piobufbase
-        * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
-        * The buffers are still packed, so a single range covers both.
-        */
-       if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
-               unsigned long pio2kbase, pio4kbase;
-               pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
-               pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
-               if (pio2kbase < pio4kbase) { /* all, for now */
-                       pioaddr = addr + pio2kbase;
-                       piolen = pio4kbase - pio2kbase +
-                               dd->ipath_piobcnt4k * dd->ipath_4kalign;
-               } else {
-                       pioaddr = addr + pio4kbase;
-                       piolen = pio2kbase - pio4kbase +
-                               dd->ipath_piobcnt2k * dd->ipath_palign;
-               }
-       } else {  /* single buffer size (2K, currently) */
-               pioaddr = addr + dd->ipath_piobufbase;
-               piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
-                       dd->ipath_piobcnt4k * dd->ipath_4kalign;
-       }
-
-       for (bits = 0; !(piolen & (1ULL << bits)); bits++)
-               /* do nothing */ ;
-
-       if (piolen != (1ULL << bits)) {
-               piolen >>= bits;
-               while (piolen >>= 1)
-                       bits++;
-               piolen = 1ULL << (bits + 1);
-       }
-       if (pioaddr & (piolen - 1)) {
-               u64 atmp;
-               ipath_dbg("pioaddr %llx not on right boundary for size "
-                         "%llx, fixing\n",
-                         (unsigned long long) pioaddr,
-                         (unsigned long long) piolen);
-               atmp = pioaddr & ~(piolen - 1);
-               if (atmp < addr || (atmp + piolen) > (addr + len)) {
-                       ipath_dev_err(dd, "No way to align address/size "
-                                     "(%llx/%llx), no WC mtrr\n",
-                                     (unsigned long long) atmp,
-                                     (unsigned long long) piolen << 1);
-                       ret = -ENODEV;
-               } else {
-                       ipath_dbg("changing WC base from %llx to %llx, "
-                                 "len from %llx to %llx\n",
-                                 (unsigned long long) pioaddr,
-                                 (unsigned long long) atmp,
-                                 (unsigned long long) piolen,
-                                 (unsigned long long) piolen << 1);
-                       pioaddr = atmp;
-                       piolen <<= 1;
-               }
-       }
-
-       if (!ret) {
-               dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
-               if (dd->wc_cookie < 0) {
-                       ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
-                       ret = -ENODEV;
-               } else if (dd->wc_cookie == 0)
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
-               else
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
-       }
-
-       return ret;
-}
-
-/**
- * ipath_disable_wc - disable write combining for MMIO writes to the device
- * @dd: infinipath device
- */
-void ipath_disable_wc(struct ipath_devdata *dd)
-{
-       arch_phys_wc_del(dd->wc_cookie);
-}
index efd6f45..7e8037e 100644 (file)
@@ -1,7 +1,7 @@
 menu "Speakup console speech"
 
 config SPEAKUP
-       depends on VT
+       depends on VT && !MN10300
        tristate "Speakup core"
        ---help---
                This is the Speakup screen reader.  Think of it as a
index 63c59bc..30cf973 100644 (file)
@@ -264,8 +264,9 @@ static struct notifier_block vt_notifier_block = {
        .notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+       pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
        return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@ static void speakup_date(struct vc_data *vc)
        spk_y = spk_cy = vc->vc_y;
        spk_pos = spk_cp = vc->vc_pos;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@ static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
        u16 ch = ' ';
 
        if (vc && pos) {
-               u16 w = scr_readw(pos);
-               u16 c = w & 0xff;
+               u16 w;
+               u16 c;
+
+               pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+               w = scr_readw(pos);
+               c = w & 0xff;
 
                if (w & vc->vc_hi_font_mask)
                        c |= 0x100;
@@ -746,7 +751,7 @@ static int get_line(struct vc_data *vc)
        u_char tmp2;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
        for (i = 0; i < vc->vc_cols; i++) {
                buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
                tmp += 2;
@@ -811,7 +816,7 @@ static int say_from_to(struct vc_data *vc, u_long from, u_long to,
        u_short saved_punc_mask = spk_punc_mask;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) from);
+       spk_attr = get_attributes(vc, (u_short *)from);
        while (from < to) {
                buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
                from += 2;
@@ -886,7 +891,7 @@ static int get_sentence_buf(struct vc_data *vc, int read_punc)
        sentmarks[bn][0] = &sentbuf[bn][0];
        i = 0;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) start);
+       spk_attr = get_attributes(vc, (u_short *)start);
 
        while (start < end) {
                sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@ static int count_highlight_color(struct vc_data *vc)
                u16 *ptr;
 
                for (ptr = start; ptr < end; ptr++) {
-                       ch = get_attributes(ptr);
+                       ch = get_attributes(vc, ptr);
                        bg = (ch & 0x70) >> 4;
                        speakup_console[vc_num]->ht.bgcount[bg]++;
                }
index aa5ab6c..41ef099 100644 (file)
@@ -142,7 +142,9 @@ static void __speakup_paste_selection(struct work_struct *work)
        struct tty_ldisc *ld;
        DECLARE_WAITQUEUE(wait, current);
 
-       ld = tty_ldisc_ref_wait(tty);
+       ld = tty_ldisc_ref(tty);
+       if (!ld)
+               goto tty_unref;
        tty_buffer_lock_exclusive(&vc->port);
 
        add_wait_queue(&vc->paste_wait, &wait);
@@ -162,6 +164,7 @@ static void __speakup_paste_selection(struct work_struct *work)
 
        tty_buffer_unlock_exclusive(&vc->port);
        tty_ldisc_deref(ld);
+tty_unref:
        tty_kref_put(tty);
 }
 
index 3b5835b..a5bbb33 100644 (file)
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index)
        int baud = 9600, quot = 0;
        unsigned int cval = 0;
        int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-       const struct old_serial_port *ser = rs_table + index;
+       const struct old_serial_port *ser;
        int err;
 
+       if (index >= ARRAY_SIZE(rs_table)) {
+               pr_info("no port info for ttyS%d\n", index);
+               return NULL;
+       }
+       ser = rs_table + index;
+
        /*      Divisor, bytesize and parity */
        quot = ser->baud_base / baud;
        cval = cflag & (CSIZE | CSTOPB);
index ccc0ad0..36fa724 100644 (file)
 /* Braswell thermal reporting device */
 #define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
 
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL  0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL  0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL  0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL  0x5A8C
+
 struct power_config {
        u32     index;
        u32     min_uw;
@@ -404,6 +410,10 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
        { 0, },
 };
 
index 50c7da7..00d81af 100644 (file)
@@ -136,7 +136,7 @@ struct pch_dev_ops {
 
 
 /* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
        .hw_init = pch_wpt_init,
        .get_temp = pch_wpt_get_temp,
 };
index 13d01ed..44b9c48 100644 (file)
@@ -75,11 +75,11 @@ struct rcar_thermal_priv {
 #define rcar_has_irq_support(priv)     ((priv)->common->base)
 #define rcar_id_to_shift(priv)         ((priv)->id * 8)
 
-#ifdef DEBUG
-# define rcar_force_update_temp(priv)  1
-#else
-# define rcar_force_update_temp(priv)  0
-#endif
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+       { .compatible = "renesas,rcar-thermal", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
 
 /*
  *             basic functions
@@ -203,14 +203,26 @@ err_out_unlock:
 static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
 {
        struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       int tmp;
+       int ret;
 
-       if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
-               rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return ret;
 
        mutex_lock(&priv->lock);
-       *temp =  MCELSIUS((priv->ctemp * 5) - 65);
+       tmp =  MCELSIUS((priv->ctemp * 5) - 65);
        mutex_unlock(&priv->lock);
 
+       if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+               struct device *dev = rcar_priv_to_dev(priv);
+
+               dev_err(dev, "it couldn't measure temperature correctly\n");
+               return -EIO;
+       }
+
+       *temp = tmp;
+
        return 0;
 }
 
@@ -288,6 +300,9 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
        unsigned long flags;
        u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
 
+       if (!rcar_has_irq_support(priv))
+               return;
+
        spin_lock_irqsave(&common->lock, flags);
 
        rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
@@ -299,11 +314,15 @@ static void rcar_thermal_work(struct work_struct *work)
 {
        struct rcar_thermal_priv *priv;
        int cctemp, nctemp;
+       int ret;
 
        priv = container_of(work, struct rcar_thermal_priv, work.work);
 
        rcar_thermal_get_temp(priv->zone, &cctemp);
-       rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return;
+
        rcar_thermal_irq_enable(priv);
 
        rcar_thermal_get_temp(priv->zone, &nctemp);
@@ -368,8 +387,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
        struct rcar_thermal_priv *priv;
 
        rcar_thermal_for_each_priv(priv, common) {
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_disable(priv);
+               rcar_thermal_irq_disable(priv);
                thermal_zone_device_unregister(priv->zone);
        }
 
@@ -441,7 +459,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                mutex_init(&priv->lock);
                INIT_LIST_HEAD(&priv->list);
                INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
-               rcar_thermal_update_temp(priv);
+               ret = rcar_thermal_update_temp(priv);
+               if (ret < 0)
+                       goto error_unregister;
 
                priv->zone = thermal_zone_device_register("rcar_thermal",
                                                1, 0, priv,
@@ -453,8 +473,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                        goto error_unregister;
                }
 
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_enable(priv);
+               rcar_thermal_irq_enable(priv);
 
                list_move_tail(&priv->list, &common->head);
 
@@ -484,12 +503,6 @@ error_unregister:
        return ret;
 }
 
-static const struct of_device_id rcar_thermal_dt_ids[] = {
-       { .compatible = "renesas,rcar-thermal", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
 static struct platform_driver rcar_thermal_driver = {
        .driver = {
                .name   = "rcar_thermal",
index e845841..b58e3fb 100644 (file)
@@ -38,7 +38,7 @@ enum tshut_mode {
 };
 
 /**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
  * the bit 8 is tshut polarity.
  * 0: low active, 1: high active
  */
@@ -57,10 +57,10 @@ enum sensor_id {
 };
 
 /**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
 enum adc_sort_mode {
        ADC_DECREMENT = 0,
        ADC_INCREMENT,
@@ -72,16 +72,17 @@ enum adc_sort_mode {
  */
 #define SOC_MAX_SENSORS        2
 
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
 struct chip_tsadc_table {
        const struct tsadc_table *id;
-
-       /* the array table size*/
        unsigned int length;
-
-       /* that analogic mask data */
        u32 data_mask;
-
-       /* the sort mode is adc value that increment or decrement in table */
        enum adc_sort_mode mode;
 };
 
@@ -153,6 +154,7 @@ struct rockchip_thermal_data {
 #define TSADCV2_SHUT_2GPIO_SRC_EN(chn)         BIT(4 + (chn))
 #define TSADCV2_SHUT_2CRU_SRC_EN(chn)          BIT(8 + (chn))
 
+#define TSADCV1_INT_PD_CLEAR_MASK              ~BIT(16)
 #define TSADCV2_INT_PD_CLEAR_MASK              ~BIT(8)
 
 #define TSADCV2_DATA_MASK                      0xfff
@@ -168,6 +170,51 @@ struct tsadc_table {
        int temp;
 };
 
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {436, -40000},
+       {431, -35000},
+       {426, -30000},
+       {421, -25000},
+       {416, -20000},
+       {411, -15000},
+       {406, -10000},
+       {401, -5000},
+       {395, 0},
+       {390, 5000},
+       {385, 10000},
+       {380, 15000},
+       {375, 20000},
+       {370, 25000},
+       {364, 30000},
+       {359, 35000},
+       {354, 40000},
+       {349, 45000},
+       {343, 50000},
+       {338, 55000},
+       {333, 60000},
+       {328, 65000},
+       {322, 70000},
+       {317, 75000},
+       {312, 80000},
+       {307, 85000},
+       {301, 90000},
+       {296, 95000},
+       {291, 100000},
+       {286, 105000},
+       {280, 110000},
+       {275, 115000},
+       {270, 120000},
+       {264, 125000},
+};
+
 static const struct tsadc_table v2_code_table[] = {
        {TSADCV2_DATA_MASK, -40000},
        {3800, -40000},
@@ -245,6 +292,44 @@ static const struct tsadc_table v3_code_table[] = {
        {TSADCV3_DATA_MASK, 125000},
 };
 
+static const struct tsadc_table v4_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {431, -40000},
+       {426, -35000},
+       {421, -30000},
+       {415, -25000},
+       {410, -20000},
+       {405, -15000},
+       {399, -10000},
+       {394, -5000},
+       {389, 0},
+       {383, 5000},
+       {378, 10000},
+       {373, 15000},
+       {367, 20000},
+       {362, 25000},
+       {357, 30000},
+       {351, 35000},
+       {346, 40000},
+       {340, 45000},
+       {335, 50000},
+       {330, 55000},
+       {324, 60000},
+       {319, 65000},
+       {313, 70000},
+       {308, 75000},
+       {302, 80000},
+       {297, 85000},
+       {291, 90000},
+       {286, 95000},
+       {281, 100000},
+       {275, 105000},
+       {270, 110000},
+       {264, 115000},
+       {259, 120000},
+       {253, 125000},
+};
+
 static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
                                   int temp)
 {
@@ -368,6 +453,14 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
                       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
 }
 
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+       u32 val;
+
+       val = readl_relaxed(regs + TSADCV2_INT_PD);
+       writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
 static void rk_tsadcv2_irq_ack(void __iomem *regs)
 {
        u32 val;
@@ -429,6 +522,29 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
        writel_relaxed(val, regs + TSADCV2_INT_EN);
 }
 
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_num = 1, /* one channel for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v1_code_table,
+               .length = ARRAY_SIZE(v1_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
        .chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
        .chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
@@ -477,7 +593,35 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
        },
 };
 
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+       .chn_num = 2, /* two channels for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v4_code_table,
+               .length = ARRAY_SIZE(v4_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct of_device_id of_rockchip_thermal_match[] = {
+       {
+               .compatible = "rockchip,rk3228-tsadc",
+               .data = (void *)&rk3228_tsadc_data,
+       },
        {
                .compatible = "rockchip,rk3288-tsadc",
                .data = (void *)&rk3288_tsadc_data,
@@ -486,6 +630,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
                .compatible = "rockchip,rk3368-tsadc",
                .data = (void *)&rk3368_tsadc_data,
        },
+       {
+               .compatible = "rockchip,rk3399-tsadc",
+               .data = (void *)&rk3399_tsadc_data,
+       },
        { /* end */ },
 };
 MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -617,7 +765,7 @@ rockchip_thermal_register_sensor(struct platform_device *pdev,
        return 0;
 }
 
-/*
+/**
  * Reset TSADC Controller, reset all tsadc registers.
  */
 static void rockchip_thermal_reset_controller(struct reset_control *reset)
index 2f9f708..ea9366a 100644 (file)
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
        next_target = instance->target;
        dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
+       if (!instance->initialized) {
+               if (throttle) {
+                       next_target = (cur_state + 1) >= instance->upper ?
+                                       instance->upper :
+                                       ((cur_state + 1) < instance->lower ?
+                                       instance->lower : (cur_state + 1));
+               } else {
+                       next_target = THERMAL_NO_TARGET;
+               }
+
+               return next_target;
+       }
+
        switch (trend) {
        case THERMAL_TREND_RAISING:
                if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
                                        old_target, (int)instance->target);
 
-               if (old_target == instance->target)
+               if (instance->initialized && old_target == instance->target)
                        continue;
 
                /* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                        instance->target == THERMAL_NO_TARGET)
                        update_passive_instance(tz, trip_type, -1);
 
-
+               instance->initialized = true;
                instance->cdev->updated = false; /* cdev needs update */
        }
 
index d9e525c..a0a8fd1 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <linux/suspend.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static atomic_t in_suspend;
+
 static struct thermal_governor *def_governor;
 
 static struct thermal_governor *__find_governor(const char *name)
@@ -532,14 +535,31 @@ static void update_temperature(struct thermal_zone_device *tz)
        mutex_unlock(&tz->lock);
 
        trace_thermal_temperature(tz);
-       dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
-                               tz->last_temperature, tz->temperature);
+       if (tz->last_temperature == THERMAL_TEMP_INVALID)
+               dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+                       tz->temperature);
+       else
+               dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+                       tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+       struct thermal_instance *pos;
+
+       tz->temperature = THERMAL_TEMP_INVALID;
+       tz->passive = 0;
+       list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+               pos->initialized = false;
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
        int count;
 
+       if (atomic_read(&in_suspend))
+               return;
+
        if (!tz->ops->get_temp)
                return;
 
@@ -676,8 +696,12 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        ret = tz->ops->set_trip_temp(tz, trip, temperature);
+       if (ret)
+               return ret;
 
-       return ret ? ret : count;
+       thermal_zone_device_update(tz);
+
+       return count;
 }
 
 static ssize_t
@@ -1321,6 +1345,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
        if (!result) {
                list_add_tail(&dev->tz_node, &tz->thermal_instances);
                list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+               atomic_set(&tz->need_update, 1);
        }
        mutex_unlock(&cdev->lock);
        mutex_unlock(&tz->lock);
@@ -1430,6 +1455,7 @@ __thermal_cooling_device_register(struct device_node *np,
                                  const struct thermal_cooling_device_ops *ops)
 {
        struct thermal_cooling_device *cdev;
+       struct thermal_zone_device *pos = NULL;
        int result;
 
        if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1474,6 +1500,12 @@ __thermal_cooling_device_register(struct device_node *np,
        /* Update binding information for 'this' new cdev */
        bind_cdev(cdev);
 
+       mutex_lock(&thermal_list_lock);
+       list_for_each_entry(pos, &thermal_tz_list, node)
+               if (atomic_cmpxchg(&pos->need_update, 1, 0))
+                       thermal_zone_device_update(pos);
+       mutex_unlock(&thermal_list_lock);
+
        return cdev;
 }
 
@@ -1806,6 +1838,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
        tz->trips = trips;
        tz->passive_delay = passive_delay;
        tz->polling_delay = polling_delay;
+       /* A new thermal zone needs to be updated anyway. */
+       atomic_set(&tz->need_update, 1);
 
        dev_set_name(&tz->device, "thermal_zone%d", tz->id);
        result = device_register(&tz->device);
@@ -1900,7 +1934,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
        INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
-       thermal_zone_device_update(tz);
+       thermal_zone_device_reset(tz);
+       /* Update the new thermal zone and mark it as already updated. */
+       if (atomic_cmpxchg(&tz->need_update, 1, 0))
+               thermal_zone_device_update(tz);
 
        return tz;
 
@@ -2140,6 +2177,36 @@ static void thermal_unregister_governors(void)
        thermal_gov_power_allocator_unregister();
 }
 
+static int thermal_pm_notify(struct notifier_block *nb,
+                               unsigned long mode, void *_unused)
+{
+       struct thermal_zone_device *tz;
+
+       switch (mode) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_RESTORE_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               atomic_set(&in_suspend, 1);
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_RESTORE:
+       case PM_POST_SUSPEND:
+               atomic_set(&in_suspend, 0);
+               list_for_each_entry(tz, &thermal_tz_list, node) {
+                       thermal_zone_device_reset(tz);
+                       thermal_zone_device_update(tz);
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+       .notifier_call = thermal_pm_notify,
+};
+
 static int __init thermal_init(void)
 {
        int result;
@@ -2160,6 +2227,11 @@ static int __init thermal_init(void)
        if (result)
                goto exit_netlink;
 
+       result = register_pm_notifier(&thermal_pm_nb);
+       if (result)
+               pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+                       result);
+
        return 0;
 
 exit_netlink:
@@ -2179,6 +2251,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+       unregister_pm_notifier(&thermal_pm_nb);
        of_thermal_destroy_zones();
        genetlink_exit();
        class_unregister(&thermal_class);
index d7ac1fc..749d41a 100644 (file)
@@ -41,6 +41,7 @@ struct thermal_instance {
        struct thermal_zone_device *tz;
        struct thermal_cooling_device *cdev;
        int trip;
+       bool initialized;
        unsigned long upper;    /* Highest cooling state for this trip point */
        unsigned long lower;    /* Lowest cooling state for this trip point */
        unsigned long target;   /* expected cooling state */
index d9a5fc2..b280aba 100644 (file)
@@ -269,16 +269,13 @@ static void n_tty_check_throttle(struct tty_struct *tty)
 
 static void n_tty_check_unthrottle(struct tty_struct *tty)
 {
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY) {
                if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE)
                        return;
                if (!tty->count)
                        return;
                n_tty_kick_worker(tty);
-               n_tty_write_wakeup(tty->link);
-               if (waitqueue_active(&tty->link->write_wait))
-                       wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT);
+               tty_wakeup(tty->link);
                return;
        }
 
index 4097f3f..e71ec78 100644 (file)
@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_INTEL_BSW_UART1  0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2  0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1  0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2  0x9ce4
+
 #define BYT_PRV_CLK                    0x800
 #define BYT_PRV_CLK_EN                 (1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT                1
@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *priv,
        switch (pdev->device) {
        case PCI_DEVICE_ID_INTEL_BYT_UART1:
        case PCI_DEVICE_ID_INTEL_BSW_UART1:
+       case PCI_DEVICE_ID_INTEL_BDW_UART1:
                rx_param->src_id = 3;
                tx_param->dst_id = 2;
                break;
        case PCI_DEVICE_ID_INTEL_BYT_UART2:
        case PCI_DEVICE_ID_INTEL_BSW_UART2:
+       case PCI_DEVICE_ID_INTEL_BDW_UART2:
                rx_param->src_id = 5;
                tx_param->dst_id = 4;
                break;
@@ -2062,6 +2067,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = byt_serial_setup,
        },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART1,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
        /*
         * ITE
         */
@@ -5506,6 +5525,16 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
                pbn_byt },
 
+       /* Intel Broadwell */
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+
        /*
         * Intel Quark x1000
         */
index 892c923..5cec01c 100644 (file)
@@ -1463,13 +1463,13 @@ static int tty_reopen(struct tty_struct *tty)
 {
        struct tty_driver *driver = tty->driver;
 
-       if (!tty->count)
-               return -EIO;
-
        if (driver->type == TTY_DRIVER_TYPE_PTY &&
            driver->subtype == PTY_TYPE_MASTER)
                return -EIO;
 
+       if (!tty->count)
+               return -EAGAIN;
+
        if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
                return -EBUSY;
 
@@ -2065,7 +2065,12 @@ retry_open:
 
                if (tty) {
                        mutex_unlock(&tty_mutex);
-                       tty_lock(tty);
+                       retval = tty_lock_interruptible(tty);
+                       if (retval) {
+                               if (retval == -EINTR)
+                                       retval = -ERESTARTSYS;
+                               goto err_unref;
+                       }
                        /* safe to drop the kref from tty_driver_lookup_tty() */
                        tty_kref_put(tty);
                        retval = tty_reopen(tty);
@@ -2083,7 +2088,11 @@ retry_open:
 
        if (IS_ERR(tty)) {
                retval = PTR_ERR(tty);
-               goto err_file;
+               if (retval != -EAGAIN || signal_pending(current))
+                       goto err_file;
+               tty_free_file(filp);
+               schedule();
+               goto retry_open;
        }
 
        tty_add_file(tty, filp);
@@ -2152,6 +2161,7 @@ retry_open:
        return 0;
 err_unlock:
        mutex_unlock(&tty_mutex);
+err_unref:
        /* after locks to avoid deadlock */
        if (!IS_ERR_OR_NULL(driver))
                tty_driver_kref_put(driver);
@@ -2648,6 +2658,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
        return ret;
 }
 
+/**
+ *     tiocgetd        -       get line discipline
+ *     @tty: tty device
+ *     @p: pointer to user data
+ *
+ *     Retrieves the line discipline id directly from the ldisc.
+ *
+ *     Locking: waits for ldisc reference (in case the line discipline
+ *             is changing or the tty is being hungup)
+ */
+
+static int tiocgetd(struct tty_struct *tty, int __user *p)
+{
+       struct tty_ldisc *ld;
+       int ret;
+
+       ld = tty_ldisc_ref_wait(tty);
+       ret = put_user(ld->ops->num, p);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
 /**
  *     send_break      -       performed time break
  *     @tty: device to break on
@@ -2874,7 +2906,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case TIOCGSID:
                return tiocgsid(tty, real_tty, p);
        case TIOCGETD:
-               return put_user(tty->ldisc->ops->num, (int __user *)p);
+               return tiocgetd(tty, p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        case TIOCVHANGUP:
index 77703a3..d2f3c4c 100644 (file)
@@ -19,6 +19,14 @@ void __lockfunc tty_lock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_lock);
 
+int tty_lock_interruptible(struct tty_struct *tty)
+{
+       if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
+               return -EIO;
+       tty_kref_get(tty);
+       return mutex_lock_interruptible(&tty->legacy_mutex);
+}
+
 void __lockfunc tty_unlock(struct tty_struct *tty)
 {
        if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
index e7cbc44..bd51bdd 100644 (file)
@@ -4250,6 +4250,7 @@ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
        return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {
index 26ca4f9..fa4e239 100644 (file)
@@ -428,7 +428,8 @@ static void acm_read_bulk_callback(struct urb *urb)
                set_bit(rb->index, &acm->read_urbs_free);
                dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
                                                        __func__, status);
-               return;
+               if ((status != -ENOENT) || (urb->actual_length == 0))
+                       return;
        }
 
        usb_mark_last_busy(acm->dev);
@@ -1404,6 +1405,8 @@ made_compressed_probe:
                                usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
                                NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               if (quirks & SEND_ZERO_PACKET)
+                       snd->urb->transfer_flags |= URB_ZERO_PACKET;
                snd->instance = acm;
        }
 
@@ -1838,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = {
        },
 #endif
 
+       /*Samsung phone in firmware update mode */
+       { USB_DEVICE(0x04e8, 0x685d),
+       .driver_info = IGNORE_DEVICE,
+       },
+
        /* Exclude Infineon Flash Loader utility */
        { USB_DEVICE(0x058b, 0x0041),
        .driver_info = IGNORE_DEVICE,
@@ -1861,6 +1869,10 @@ static const struct usb_device_id acm_ids[] = {
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },
 
+       { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+       .driver_info = SEND_ZERO_PACKET,
+       },
+
        { }
 };
 
index dd9af38..ccfaba9 100644 (file)
@@ -134,3 +134,4 @@ struct acm {
 #define IGNORE_DEVICE                  BIT(5)
 #define QUIRK_CONTROL_LINE_STATE       BIT(6)
 #define CLEAR_HALT_CONDITIONS          BIT(7)
+#define SEND_ZERO_PACKET               BIT(8)
index 51b4369..350dcd9 100644 (file)
@@ -5401,7 +5401,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
        }
 
        bos = udev->bos;
-       udev->bos = NULL;
 
        for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5494,8 +5493,11 @@ done:
        usb_set_usb2_hardware_lpm(udev, 1);
        usb_unlocked_enable_lpm(udev);
        usb_enable_ltm(udev);
-       usb_release_bos_descriptor(udev);
-       udev->bos = bos;
+       /* release the new BOS descriptor allocated  by hub_port_init() */
+       if (udev->bos != bos) {
+               usb_release_bos_descriptor(udev);
+               udev->bos = bos;
+       }
        return 0;
 
 re_enumerate:
index 39a0fa8..e991d55 100644 (file)
@@ -572,12 +572,6 @@ static bool dwc2_force_mode(struct dwc2_hsotg *hsotg, bool host)
        set = host ? GUSBCFG_FORCEHOSTMODE : GUSBCFG_FORCEDEVMODE;
        clear = host ? GUSBCFG_FORCEDEVMODE : GUSBCFG_FORCEHOSTMODE;
 
-       /*
-        * If the force mode bit is already set, don't set it.
-        */
-       if ((gusbcfg & set) && !(gusbcfg & clear))
-               return false;
-
        gusbcfg &= ~clear;
        gusbcfg |= set;
        dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
@@ -3278,9 +3272,6 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 /**
  * During device initialization, read various hardware configuration
  * registers and interpret the contents.
- *
- * This should be called during driver probe. It will perform a core
- * soft reset in order to get the reset values of the parameters.
  */
 int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 {
@@ -3288,7 +3279,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
        unsigned width;
        u32 hwcfg1, hwcfg2, hwcfg3, hwcfg4;
        u32 grxfsiz;
-       int retval;
 
        /*
         * Attempt to ensure this device is really a DWC_otg Controller.
@@ -3308,10 +3298,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
                hw->snpsid >> 12 & 0xf, hw->snpsid >> 8 & 0xf,
                hw->snpsid >> 4 & 0xf, hw->snpsid & 0xf, hw->snpsid);
 
-       retval = dwc2_core_reset(hsotg);
-       if (retval)
-               return retval;
-
        hwcfg1 = dwc2_readl(hsotg->regs + GHWCFG1);
        hwcfg2 = dwc2_readl(hsotg->regs + GHWCFG2);
        hwcfg3 = dwc2_readl(hsotg->regs + GHWCFG3);
index 510f787..690b9fd 100644 (file)
@@ -530,7 +530,13 @@ static int dwc2_driver_probe(struct platform_device *dev)
        if (retval)
                return retval;
 
-       /* Reset the controller and detect hardware config values */
+       /*
+        * Reset before dwc2_get_hwparams() then it could get power-on real
+        * reset value form registers.
+        */
+       dwc2_core_reset_and_force_dr_mode(hsotg);
+
+       /* Detect config values from hardware */
        retval = dwc2_get_hwparams(hsotg);
        if (retval)
                goto error;
index af023a8..7d1dd82 100644 (file)
@@ -2789,6 +2789,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
        dwc->gadget.speed               = USB_SPEED_UNKNOWN;
        dwc->gadget.sg_supported        = true;
        dwc->gadget.name                = "dwc3-gadget";
+       dwc->gadget.is_otg              = dwc->dr_mode == USB_DR_MODE_OTG;
 
        /*
         * FIXME We might be setting max_speed to <SUPER, however versions
index 0fbfb2b..26ccad5 100644 (file)
@@ -673,7 +673,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
        unsigned long           flags;
        int                     tx_list_empty;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock_irqsave(&dev->lock, flags);
        tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
        spin_unlock_irqrestore(&dev->lock, flags);
@@ -683,7 +683,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
                wait_event_interruptible(dev->tx_flush_wait,
                                (likely(list_empty(&dev->tx_reqs_active))));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 365afd7..7e179f8 100644 (file)
@@ -1521,10 +1521,10 @@ static void destroy_ep_files (struct dev_data *dev)
                spin_unlock_irq (&dev->lock);
 
                /* break link to dcache */
-               mutex_lock (&parent->i_mutex);
+               inode_lock(parent);
                d_delete (dentry);
                dput (dentry);
-               mutex_unlock (&parent->i_mutex);
+               inode_unlock(parent);
 
                spin_lock_irq (&dev->lock);
        }
index f92f5af..8755b2c 100644 (file)
@@ -91,7 +91,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
        if (!access_ok(VERIFY_WRITE, buf, nbytes))
                return -EFAULT;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        list_for_each_entry_safe(req, tmp_req, queue, queue) {
                len = snprintf(tmpbuf, sizeof(tmpbuf),
                                "%8p %08x %c%c%c %5d %c%c%c\n",
@@ -118,7 +118,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
                nbytes -= len;
                buf += len;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return actual;
 }
@@ -143,7 +143,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        u32 *data;
        int ret = -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        udc = inode->i_private;
        data = kmalloc(inode->i_size, GFP_KERNEL);
        if (!data)
@@ -158,7 +158,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        ret = 0;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -169,11 +169,11 @@ static ssize_t regs_dbg_read(struct file *file, char __user *buf,
        struct inode *inode = file_inode(file);
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = simple_read_from_buffer(buf, nbytes, ppos,
                        file->private_data,
                        file_inode(file)->i_size);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index daa563f..1f117c3 100644 (file)
@@ -229,6 +229,8 @@ config USB_EHCI_TEGRA
        depends on ARCH_TEGRA
        select USB_EHCI_ROOT_HUB_TT
        select USB_PHY
+       select USB_ULPI
+       select USB_ULPI_VIEWPORT
        help
          This driver enables support for the internal USB Host Controllers
          found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
index 04ce6b1..e0244fb 100644 (file)
@@ -112,12 +112,16 @@ static inline int xhci_find_next_ext_cap(void __iomem *base, u32 start, int id)
        offset = start;
        if (!start || start == XHCI_HCC_PARAMS_OFFSET) {
                val = readl(base + XHCI_HCC_PARAMS_OFFSET);
+               if (val == ~0)
+                       return 0;
                offset = XHCI_HCC_EXT_CAPS(val) << 2;
                if (!offset)
                        return 0;
        };
        do {
                val = readl(base + offset);
+               if (val == ~0)
+                       return 0;
                if (XHCI_EXT_CAPS_ID(val) == id && offset != start)
                        return offset;
 
index c30de7c..73f763c 100644 (file)
@@ -275,8 +275,9 @@ static bool need_bw_sch(struct usb_host_endpoint *ep,
                return false;
 
        /*
-        * for LS & FS periodic endpoints which its device don't attach
-        * to TT are also ignored, root-hub will schedule them directly
+        * for LS & FS periodic endpoints which its device is not behind
+        * a TT are also ignored, root-hub will schedule them directly,
+        * but need set @bpkts field of endpoint context to 1.
         */
        if (is_fs_or_ls(speed) && !has_tt)
                return false;
@@ -339,8 +340,17 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
                GET_MAX_PACKET(usb_endpoint_maxp(&ep->desc)),
                usb_endpoint_dir_in(&ep->desc), ep);
 
-       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT))
+       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT)) {
+               /*
+                * set @bpkts to 1 if it is LS or FS periodic endpoint, and its
+                * device does not connected through an external HS hub
+                */
+               if (usb_endpoint_xfer_int(&ep->desc)
+                       || usb_endpoint_xfer_isoc(&ep->desc))
+                       ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(1));
+
                return 0;
+       }
 
        bw_index = get_bw_index(xhci, udev, ep);
        sch_bw = &sch_array[bw_index];
index c9ab6a4..9532f5a 100644 (file)
@@ -696,9 +696,24 @@ static int xhci_mtk_remove(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+/*
+ * if ip sleep fails, and all clocks are disabled, access register will hang
+ * AHB bus, so stop polling roothubs to avoid regs access on bus suspend.
+ * and no need to check whether ip sleep failed or not; this will cause SPM
+ * to wake up system immediately after system suspend complete if ip sleep
+ * fails, it is what we wanted.
+ */
 static int xhci_mtk_suspend(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+       xhci_dbg(xhci, "%s: stop port polling\n", __func__);
+       clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       del_timer_sync(&hcd->rh_timer);
+       clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       del_timer_sync(&xhci->shared_hcd->rh_timer);
 
        xhci_mtk_host_disable(mtk);
        xhci_mtk_phy_power_off(mtk);
@@ -710,11 +725,19 @@ static int xhci_mtk_suspend(struct device *dev)
 static int xhci_mtk_resume(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
        usb_wakeup_disable(mtk);
        xhci_mtk_clks_enable(mtk);
        xhci_mtk_phy_power_on(mtk);
        xhci_mtk_host_enable(mtk);
+
+       xhci_dbg(xhci, "%s: restart port polling\n", __func__);
+       set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       usb_hcd_poll_rh_status(hcd);
+       set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       usb_hcd_poll_rh_status(xhci->shared_hcd);
        return 0;
 }
 
index 58c43ed..f0640b7 100644 (file)
@@ -28,7 +28,9 @@
 #include "xhci.h"
 #include "xhci-trace.h"
 
-#define PORT2_SSIC_CONFIG_REG2 0x883c
+#define SSIC_PORT_NUM          2
+#define SSIC_PORT_CFG2         0x880c
+#define SSIC_PORT_CFG2_OFFSET  0x30
 #define PROG_DONE              (1 << 30)
 #define SSIC_PORT_UNUSED       (1 << 31)
 
@@ -45,6 +47,7 @@
 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI            0x22b5
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI                0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI       0x9d2f
+#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI             0x0aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -151,9 +154,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
                 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) {
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
                xhci->quirks |= XHCI_PME_STUCK_QUIRK;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+               xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+       }
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
                        pdev->device == PCI_DEVICE_ID_EJ168) {
                xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -312,22 +320,20 @@ static void xhci_pci_remove(struct pci_dev *dev)
  * SSIC PORT need to be marked as "unused" before putting xHCI
  * into D3. After D3 exit, the SSIC port need to be marked as "used".
  * Without this change, xHCI might not enter D3 state.
- * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
- * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
  */
-static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
+static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
-       struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
        u32 val;
        void __iomem *reg;
+       int i;
 
-       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
-
-               reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2;
+       for (i = 0; i < SSIC_PORT_NUM; i++) {
+               reg = (void __iomem *) xhci->cap_regs +
+                               SSIC_PORT_CFG2 +
+                               i * SSIC_PORT_CFG2_OFFSET;
 
-               /* Notify SSIC that SSIC profile programming is not done */
+               /* Notify SSIC that SSIC profile programming is not done. */
                val = readl(reg) & ~PROG_DONE;
                writel(val, reg);
 
@@ -344,6 +350,17 @@ static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
                writel(val, reg);
                readl(reg);
        }
+}
+
+/*
+ * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
+ * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
+ */
+static void xhci_pme_quirk(struct usb_hcd *hcd)
+{
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+       void __iomem *reg;
+       u32 val;
 
        reg = (void __iomem *) xhci->cap_regs + 0x80a4;
        val = readl(reg);
@@ -355,6 +372,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
+       int                     ret;
 
        /*
         * Systems with the TI redriver that loses port status change events
@@ -364,9 +382,16 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
                pdev->no_d3cold = true;
 
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, true);
+               xhci_pme_quirk(hcd);
+
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, true);
 
-       return xhci_suspend(xhci, do_wakeup);
+       ret = xhci_suspend(xhci, do_wakeup);
+       if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED))
+               xhci_ssic_port_unused_quirk(hcd, false);
+
+       return ret;
 }
 
 static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
@@ -396,8 +421,11 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL)
                usb_enable_intel_xhci_ports(pdev);
 
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, false);
+
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, false);
+               xhci_pme_quirk(hcd);
 
        retval = xhci_resume(xhci, hibernated);
        return retval;
index 770b6b0..d39d6bf 100644 (file)
@@ -184,7 +184,8 @@ static int xhci_plat_probe(struct platform_device *pdev)
                struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
 
                /* Just copy data for now */
-               *priv = *priv_match;
+               if (priv_match)
+                       *priv = *priv_match;
        }
 
        if (xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_MARVELL_ARMADA)) {
index f1c21c4..3915657 100644 (file)
@@ -2193,10 +2193,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                }
        /* Fast path - was this the last TRB in the TD for this URB? */
        } else if (event_trb == td->last_trb) {
-               if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
-                       return finish_td(xhci, td, event_trb, event, ep,
-                                        status, false);
-
                if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
                        td->urb->actual_length =
                                td->urb->transfer_buffer_length -
@@ -2248,12 +2244,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                        td->urb->actual_length +=
                                TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
                                EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
-
-               if (trb_comp_code == COMP_SHORT_TX) {
-                       xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
-                       td->urb_length_set = true;
-                       return 0;
-               }
        }
 
        return finish_td(xhci, td, event_trb, event, ep, status, false);
index 26a44c0..0c8087d 100644 (file)
@@ -1554,7 +1554,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                "HW died, freeing TD.");
                urb_priv = urb->hcpriv;
-               for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+               for (i = urb_priv->td_cnt;
+                    i < urb_priv->length && xhci->devs[urb->dev->slot_id];
+                    i++) {
                        td = urb_priv->td[i];
                        if (!list_empty(&td->td_list))
                                list_del_init(&td->td_list);
index 9be7348..cc65138 100644 (file)
@@ -1631,6 +1631,7 @@ struct xhci_hcd {
 #define XHCI_BROKEN_STREAMS    (1 << 19)
 #define XHCI_PME_STUCK_QUIRK   (1 << 20)
 #define XHCI_MTK_HOST          (1 << 21)
+#define XHCI_SSIC_PORT_UNUSED  (1 << 22)
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
        /* There are two roothubs to keep track of bus suspend info for */
index b2685e7..3eaa4ba 100644 (file)
@@ -348,7 +348,9 @@ static int ux500_suspend(struct device *dev)
        struct ux500_glue       *glue = dev_get_drvdata(dev);
        struct musb             *musb = glue_to_musb(glue);
 
-       usb_phy_set_suspend(musb->xceiv, 1);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 1);
+
        clk_disable_unprepare(glue->clk);
 
        return 0;
@@ -366,7 +368,8 @@ static int ux500_resume(struct device *dev)
                return ret;
        }
 
-       usb_phy_set_suspend(musb->xceiv, 0);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 0);
 
        return 0;
 }
index 0d19a6d..970a30e 100644 (file)
@@ -1599,6 +1599,8 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
                                                &motg->id.nb);
                if (ret < 0) {
                        dev_err(&pdev->dev, "register ID notifier failed\n");
+                       extcon_unregister_notifier(motg->vbus.extcon,
+                                                  EXTCON_USB, &motg->vbus.nb);
                        return ret;
                }
 
@@ -1660,15 +1662,6 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg)
                return -ENOMEM;
 
-       pdata = dev_get_platdata(&pdev->dev);
-       if (!pdata) {
-               if (!np)
-                       return -ENXIO;
-               ret = msm_otg_read_dt(pdev, motg);
-               if (ret)
-                       return ret;
-       }
-
        motg->phy.otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
                                     GFP_KERNEL);
        if (!motg->phy.otg)
@@ -1710,6 +1703,15 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg->regs)
                return -ENOMEM;
 
+       pdata = dev_get_platdata(&pdev->dev);
+       if (!pdata) {
+               if (!np)
+                       return -ENXIO;
+               ret = msm_otg_read_dt(pdev, motg);
+               if (ret)
+                       return ret;
+       }
+
        /*
         * NOTE: The PHYs can be multiplexed between the chipidea controller
         * and the dwc3 controller, using a single bit. It is important that
@@ -1717,8 +1719,10 @@ static int msm_otg_probe(struct platform_device *pdev)
         */
        if (motg->phy_number) {
                phy_select = devm_ioremap_nocache(&pdev->dev, USB2_PHY_SEL, 4);
-               if (!phy_select)
-                       return -ENOMEM;
+               if (!phy_select) {
+                       ret = -ENOMEM;
+                       goto unregister_extcon;
+               }
                /* Enable second PHY with the OTG port */
                writel(0x1, phy_select);
        }
@@ -1728,7 +1732,8 @@ static int msm_otg_probe(struct platform_device *pdev)
        motg->irq = platform_get_irq(pdev, 0);
        if (motg->irq < 0) {
                dev_err(&pdev->dev, "platform_get_irq failed\n");
-               return motg->irq;
+               ret = motg->irq;
+               goto unregister_extcon;
        }
 
        regs[0].supply = "vddcx";
@@ -1737,7 +1742,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 
        ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs);
        if (ret)
-               return ret;
+               goto unregister_extcon;
 
        motg->vddcx = regs[0].consumer;
        motg->v3p3  = regs[1].consumer;
@@ -1834,6 +1839,12 @@ disable_clks:
        clk_disable_unprepare(motg->clk);
        if (!IS_ERR(motg->core_clk))
                clk_disable_unprepare(motg->core_clk);
+unregister_extcon:
+       extcon_unregister_notifier(motg->id.extcon,
+                                  EXTCON_USB_HOST, &motg->id.nb);
+       extcon_unregister_notifier(motg->vbus.extcon,
+                                  EXTCON_USB, &motg->vbus.nb);
+
        return ret;
 }
 
index c2936dc..00bfea0 100644 (file)
@@ -220,7 +220,7 @@ static int mxs_phy_hw_init(struct mxs_phy *mxs_phy)
 /* Return true if the vbus is there */
 static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy)
 {
-       unsigned int vbus_value;
+       unsigned int vbus_value = 0;
 
        if (!mxs_phy->regmap_anatop)
                return false;
index 9b90ad7..987813b 100644 (file)
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
        { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+       { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
        { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
index a5a0376..8c660ae 100644 (file)
@@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+       { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
        { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
        /* Papouch devices based on FTDI chip */
index 67c6d44..a84df25 100644 (file)
  */
 #define RATOC_VENDOR_ID                0x0584
 #define RATOC_PRODUCT_ID_USB60F        0xb020
+#define RATOC_PRODUCT_ID_SCU18 0xb03a
 
 /*
  * Infineon Technologies
index e3c3f57..6196073 100644 (file)
@@ -368,6 +368,16 @@ static int mxu1_port_probe(struct usb_serial_port *port)
        return 0;
 }
 
+static int mxu1_port_remove(struct usb_serial_port *port)
+{
+       struct mxu1_port *mxport;
+
+       mxport = usb_get_serial_port_data(port);
+       kfree(mxport);
+
+       return 0;
+}
+
 static int mxu1_startup(struct usb_serial *serial)
 {
        struct mxu1_device *mxdev;
@@ -427,6 +437,14 @@ err_free_mxdev:
        return err;
 }
 
+static void mxu1_release(struct usb_serial *serial)
+{
+       struct mxu1_device *mxdev;
+
+       mxdev = usb_get_serial_data(serial);
+       kfree(mxdev);
+}
+
 static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
                           u8 mask, u8 byte)
 {
@@ -957,7 +975,9 @@ static struct usb_serial_driver mxu11x0_device = {
        .id_table               = mxu1_idtable,
        .num_ports              = 1,
        .port_probe             = mxu1_port_probe,
+       .port_remove            = mxu1_port_remove,
        .attach                 = mxu1_startup,
+       .release                = mxu1_release,
        .open                   = mxu1_open,
        .close                  = mxu1_close,
        .ioctl                  = mxu1_ioctl,
index f228060..db86e51 100644 (file)
@@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_CC864_SINGLE             0x1006
 #define TELIT_PRODUCT_DE910_DUAL               0x1010
 #define TELIT_PRODUCT_UE910_V2                 0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0            0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 
@@ -615,6 +617,16 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+       .sendsetup = BIT(2),
+       .reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1160,6 +1172,10 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1679,7 +1695,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-       { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
index 60afb39..337a0be 100644 (file)
@@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial)
                (serial->num_interrupt_in == 0))
                return 0;
 
+       if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+               dev_err(&serial->interface->dev, "missing endpoints\n");
+               return -ENODEV;
+       }
+
        /*
        * It appears that Treos and Kyoceras want to use the
        * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,
@@ -597,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial)
         */
 
        /* some sanity check */
-       if (serial->num_ports < 2)
-               return -1;
+       if (serial->num_bulk_out < 2) {
+               dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+               return -ENODEV;
+       }
 
        /* port 0 now uses the modified endpoint Address */
        port = serial->port[0];
index 82f25cc..ecca316 100644 (file)
@@ -123,8 +123,8 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
        /*
         * With noiommu enabled, an IOMMU group will be created for a device
         * that doesn't already have one and doesn't have an iommu_ops on their
-        * bus.  We use iommu_present() again in the main code to detect these
-        * fake groups.
+        * bus.  We set iommudata simply to be able to identify these groups
+        * as special use and for reclamation later.
         */
        if (group || !noiommu || iommu_present(dev->bus))
                return group;
@@ -134,6 +134,7 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
                return NULL;
 
        iommu_group_set_name(group, "vfio-noiommu");
+       iommu_group_set_iommudata(group, &noiommu, NULL);
        ret = iommu_group_add_device(group, dev);
        iommu_group_put(group);
        if (ret)
@@ -158,7 +159,7 @@ EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-       if (!iommu_present(dev->bus))
+       if (iommu_group_get_iommudata(group) == &noiommu)
                iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@ static long vfio_noiommu_ioctl(void *iommu_data,
        return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-       return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
                                     struct iommu_group *iommu_group)
 {
-       return iommu_group_for_each_dev(iommu_group, NULL,
-                                       vfio_iommu_present) ? -EINVAL : 0;
+       return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-                                           bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
        struct vfio_group *group, *tmp;
        struct device *dev;
@@ -342,7 +336,9 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
        atomic_set(&group->container_users, 0);
        atomic_set(&group->opened, 0);
        group->iommu_group = iommu_group;
-       group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+       group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
        group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@ int vfio_add_group_dev(struct device *dev,
 
        group = vfio_group_get_from_iommu(iommu_group);
        if (!group) {
-               group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+               group = vfio_create_group(iommu_group);
                if (IS_ERR(group)) {
                        iommu_group_put(iommu_group);
                        return PTR_ERR(group);
index 3fc63c2..57721c7 100644 (file)
@@ -78,13 +78,13 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
        if (!info->fbdefio)
                return 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Kill off the delayed work */
        cancel_delayed_work_sync(&info->deferred_work);
 
        /* Run it immediately */
        schedule_delayed_work(&info->deferred_work, 0);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 36205c2..f6bed86 100644 (file)
@@ -545,6 +545,7 @@ err_enable_device:
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
        struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+       struct device *dev = get_device(&vp_dev->vdev.dev);
 
        unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
                virtio_pci_modern_remove(vp_dev);
 
        pci_disable_device(pci_dev);
+       put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
index 4f0e7be..0f6d851 100644 (file)
@@ -145,7 +145,8 @@ config MENF21BMC_WATCHDOG
 config TANGOX_WATCHDOG
        tristate "Sigma Designs SMP86xx/SMP87xx watchdog"
        select WATCHDOG_CORE
-       depends on ARCH_TANGOX || COMPILE_TEST
+       depends on ARCH_TANGO || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Support for the watchdog in Sigma Designs SMP86xx (tango3)
          and SMP87xx (tango4) family chips.
@@ -618,6 +619,7 @@ config DIGICOLOR_WATCHDOG
 config LPC18XX_WATCHDOG
        tristate "LPC18xx/43xx Watchdog"
        depends on ARCH_LPC18XX || COMPILE_TEST
+       depends on HAS_IOMEM
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -1374,6 +1376,7 @@ config BCM_KONA_WDT_DEBUG
 config BCM7038_WDT
        tristate "BCM7038 Watchdog"
        select WATCHDOG_CORE
+       depends on HAS_IOMEM
        help
         Watchdog driver for the built-in hardware in Broadcom 7038 SoCs.
 
@@ -1383,6 +1386,7 @@ config IMGPDC_WDT
        tristate "Imagination Technologies PDC Watchdog Timer"
        depends on HAS_IOMEM
        depends on METAG || MIPS || COMPILE_TEST
+       select WATCHDOG_CORE
        help
          Driver for Imagination Technologies PowerDown Controller
          Watchdog Timer.
index f36ca4b..ac5840d 100644 (file)
@@ -292,4 +292,4 @@ MODULE_PARM_DESC(nodelay,
                 "Force selection of a timeout setting without initial delay "
                 "(max6373/74 only, default=0)");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 1a11aed..68952d9 100644 (file)
@@ -608,7 +608,7 @@ static int usb_pcwd_probe(struct usb_interface *interface,
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        struct usb_pcwd_private *usb_pcwd = NULL;
-       int pipe, maxp;
+       int pipe;
        int retval = -ENOMEM;
        int got_fw_rev;
        unsigned char fw_rev_major, fw_rev_minor;
@@ -641,7 +641,6 @@ static int usb_pcwd_probe(struct usb_interface *interface,
 
        /* get a handle to the interrupt data pipe */
        pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
-       maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe));
 
        /* allocate memory for our device and initialize it */
        usb_pcwd = kzalloc(sizeof(struct usb_pcwd_private), GFP_KERNEL);
index 01d8162..e7a715e 100644 (file)
@@ -139,12 +139,11 @@ static int wdt_config(struct watchdog_device *wdd, bool ping)
 
        writel_relaxed(UNLOCK, wdt->base + WDTLOCK);
        writel_relaxed(wdt->load_val, wdt->base + WDTLOAD);
+       writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
 
-       if (!ping) {
-               writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
+       if (!ping)
                writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base +
                                WDTCONTROL);
-       }
 
        writel_relaxed(LOCK, wdt->base + WDTLOCK);
 
index 945fc43..4ac2ca8 100644 (file)
@@ -242,7 +242,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
        return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
        .put_page = tmem_cleancache_put_page,
        .get_page = tmem_cleancache_get_page,
        .invalidate_page = tmem_cleancache_flush_page,
index 7bf835f..eadc894 100644 (file)
@@ -449,14 +449,14 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
        v9fs_blank_wstat(&wstat);
 
        retval = p9_client_wstat(fid, &wstat);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
@@ -472,13 +472,13 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
 
        retval = p9_client_fsync(fid, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
index 659c579..0548c53 100644 (file)
@@ -33,11 +33,11 @@ affs_file_release(struct inode *inode, struct file *filp)
                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
        if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (inode->i_size != AFFS_I(inode)->mmu_private)
                        affs_truncate(inode);
                affs_free_prealloc(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -958,12 +958,12 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = write_inode_now(inode, 0);
        err = sync_blockdev(inode->i_sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 const struct file_operations affs_file_operations = {
index 4baf1d2..d91a9c9 100644 (file)
@@ -483,7 +483,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 
        fl->fl_type = F_UNLCK;
 
-       mutex_lock(&vnode->vfs_inode.i_mutex);
+       inode_lock(&vnode->vfs_inode);
 
        /* check local lock records first */
        ret = 0;
@@ -505,7 +505,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
        }
 
 error:
-       mutex_unlock(&vnode->vfs_inode.i_mutex);
+       inode_unlock(&vnode->vfs_inode);
        _leave(" = %d [%hd]", ret, fl->fl_type);
        return ret;
 }
index 0714abc..dfef94f 100644 (file)
@@ -693,7 +693,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* use a writeback record as a marker in the queue - when this reaches
         * the front of the queue, all the outstanding writes are either
@@ -735,7 +735,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        afs_put_writeback(wb);
        _leave(" = %d", ret);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 6530ced..25b24d0 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -195,7 +195,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
index 3a93755..051ea48 100644 (file)
@@ -491,6 +491,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  * arch_check_elf() - check an ELF executable
  * @ehdr:      The main ELF header
  * @has_interp:        True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
  * @state:     Architecture-specific state preserved throughout the process
  *             of loading the ELF.
  *
@@ -502,6 +503,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  *         with that return code.
  */
 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+                                struct elfhdr *interp_ehdr,
                                 struct arch_elf_state *state)
 {
        /* Dummy implementation, always proceed */
@@ -829,7 +831,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
         * still possible to return an error to the code that invoked
         * the exec syscall.
         */
-       retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+       retval = arch_check_elf(&loc->elf_ex,
+                               !!interpreter, &loc->interp_elf_ex,
+                               &arch_state);
        if (retval)
                goto out_free_dentry;
 
index 78f005f..3a3ced7 100644 (file)
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete this handler. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                kill_node(e);
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
@@ -675,7 +675,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                return PTR_ERR(e);
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
        dput(dentry);
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
 
        if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete all handlers. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                while (!list_empty(&entries))
                        kill_node(list_entry(entries.next, Node, list));
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
index ba762ea..39b3a17 100644 (file)
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        invalidate_bh_lrus();
@@ -346,9 +346,9 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
        struct inode *bd_inode = bdev_file_inode(file);
        loff_t retval;
 
-       mutex_lock(&bd_inode->i_mutex);
+       inode_lock(bd_inode);
        retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
-       mutex_unlock(&bd_inode->i_mutex);
+       inode_unlock(bd_inode);
        return retval;
 }
        
@@ -1142,9 +1142,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 {
        unsigned bsize = bdev_logical_block_size(bdev);
 
-       mutex_lock(&bdev->bd_inode->i_mutex);
+       inode_lock(bdev->bd_inode);
        i_size_write(bdev->bd_inode, size);
-       mutex_unlock(&bdev->bd_inode->i_mutex);
+       inode_unlock(bdev->bd_inode);
        while (bsize < PAGE_CACHE_SIZE) {
                if (size & bsize)
                        break;
@@ -1730,43 +1730,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return __dax_fault(vma, vmf, blkdev_get_block, NULL);
 }
 
-static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
-               pmd_t *pmd, unsigned int flags)
+static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
+               struct vm_fault *vmf)
 {
-       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
-}
-
-static void blkdev_vm_open(struct vm_area_struct *vma)
-{
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
-       mutex_unlock(&bd_inode->i_mutex);
+       return dax_pfn_mkwrite(vma, vmf);
 }
 
-static void blkdev_vm_close(struct vm_area_struct *vma)
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, unsigned int flags)
 {
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count--;
-       mutex_unlock(&bd_inode->i_mutex);
+       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
 }
 
 static const struct vm_operations_struct blkdev_dax_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = blkdev_dax_fault,
        .pmd_fault      = blkdev_dax_pmd_fault,
-       .pfn_mkwrite    = blkdev_dax_fault,
+       .pfn_mkwrite    = blkdev_dax_pfn_mkwrite,
 };
 
 static const struct vm_operations_struct blkdev_default_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = filemap_fault,
        .map_pages      = filemap_map_pages,
 };
@@ -1774,18 +1756,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
 static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *bd_inode = bdev_file_inode(file);
-       struct block_device *bdev = I_BDEV(bd_inode);
 
        file_accessed(file);
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
        if (IS_DAX(bd_inode)) {
                vma->vm_ops = &blkdev_dax_vm_ops;
                vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
        } else {
                vma->vm_ops = &blkdev_default_vm_ops;
        }
-       mutex_unlock(&bd_inode->i_mutex);
 
        return 0;
 }
index 88d9af3..5fb60ea 100644 (file)
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                list_add_tail(&work->ordered_list, &wq->ordered_list);
                spin_unlock_irqrestore(&wq->list_lock, flags);
        }
-       queue_work(wq->normal_wq, &work->normal_work);
        trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
index 08405a3..b90cd37 100644 (file)
@@ -560,13 +560,13 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
  */
 static void __merge_refs(struct list_head *head, int mode)
 {
-       struct __prelim_ref *ref1;
+       struct __prelim_ref *pos1;
 
-       list_for_each_entry(ref1, head, list) {
-               struct __prelim_ref *ref2 = ref1, *tmp;
+       list_for_each_entry(pos1, head, list) {
+               struct __prelim_ref *pos2 = pos1, *tmp;
 
-               list_for_each_entry_safe_continue(ref2, tmp, head, list) {
-                       struct __prelim_ref *xchg;
+               list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+                       struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
                        struct extent_inode_elem *eie;
 
                        if (!ref_for_same_block(ref1, ref2))
index 97ad9bb..bfe4a33 100644 (file)
@@ -1614,7 +1614,7 @@ struct btrfs_fs_info {
 
        spinlock_t delayed_iput_lock;
        struct list_head delayed_iputs;
-       struct rw_semaphore delayed_iput_sem;
+       struct mutex cleaner_delayed_iput_mutex;
 
        /* this protects tree_mod_seq_list */
        spinlock_t tree_mod_seq_lock;
@@ -3641,6 +3641,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
 void check_system_chunk(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        const u64 type);
index 1e668fb..cbb7dbf 100644 (file)
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
                em = lookup_extent_mapping(em_tree, start, (u64)-1);
                if (!em)
                        break;
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++)
                        if (srcdev == map->stripes[i].dev)
                                map->stripes[i].dev = tgtdev;
index e99ccd6..4545e2e 100644 (file)
 #include <asm/cpufeature.h>
 #endif
 
+#define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN |\
+                                BTRFS_HEADER_FLAG_RELOC |\
+                                BTRFS_SUPER_FLAG_ERROR |\
+                                BTRFS_SUPER_FLAG_SEEDING |\
+                                BTRFS_SUPER_FLAG_METADUMP)
+
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
@@ -176,6 +182,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
        { .id = BTRFS_UUID_TREE_OBJECTID,       .name_stem = "uuid"     },
+       { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
        { .id = 0,                              .name_stem = "tree"     },
 };
 
@@ -1583,8 +1590,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
                goto free_writers;
+
+       mutex_lock(&root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(root,
+                                       &root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&root->objectid_mutex);
+               goto free_root_dev;
+       }
+
+       ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&root->objectid_mutex);
+
        return 0;
 
+free_root_dev:
+       free_anon_bdev(root->anon_dev);
 free_writers:
        btrfs_free_subvolume_writers(root->subv_writers);
 fail:
@@ -1766,7 +1788,6 @@ static int cleaner_kthread(void *arg)
        int again;
        struct btrfs_trans_handle *trans;
 
-       set_freezable();
        do {
                again = 0;
 
@@ -1786,7 +1807,10 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
+               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(root);
+               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -2556,8 +2580,8 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
+       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
-       init_rwsem(&fs_info->delayed_iput_sem);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
@@ -2742,26 +2766,6 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       /*
-        * Leafsize and nodesize were always equal, this is only a sanity check.
-        */
-       if (le32_to_cpu(disk_super->__unused_leafsize) !=
-           btrfs_super_nodesize(disk_super)) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksizes don't match.  node %d leaf %d\n",
-                      btrfs_super_nodesize(disk_super),
-                      le32_to_cpu(disk_super->__unused_leafsize));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-       if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksize (%d) was too large\n",
-                      btrfs_super_nodesize(disk_super));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-
        features = btrfs_super_incompat_flags(disk_super);
        features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
        if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2833,17 +2837,6 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize = sectorsize;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
-       if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
-               goto fail_sb_buffer;
-       }
-
-       if (sectorsize != PAGE_SIZE) {
-               printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
-                      "found on %s\n", (unsigned long)sectorsize, sb->s_id);
-               goto fail_sb_buffer;
-       }
-
        mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
@@ -2915,6 +2908,18 @@ retry_root_backup:
        tree_root->commit_root = btrfs_root_node(tree_root);
        btrfs_set_root_refs(&tree_root->root_item, 1);
 
+       mutex_lock(&tree_root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(tree_root,
+                                       &tree_root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&tree_root->objectid_mutex);
+               goto recovery_tree_root;
+       }
+
+       ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&tree_root->objectid_mutex);
+
        ret = btrfs_read_roots(fs_info, tree_root);
        if (ret)
                goto recovery_tree_root;
@@ -4018,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
        struct btrfs_super_block *sb = fs_info->super_copy;
+       u64 nodesize = btrfs_super_nodesize(sb);
+       u64 sectorsize = btrfs_super_sectorsize(sb);
        int ret = 0;
 
+       if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+               printk(KERN_ERR "BTRFS: no valid FS found\n");
+               ret = -EINVAL;
+       }
+       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+               printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+                               btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
                printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
                                btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4037,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        }
 
        /*
-        * The common minimum, we don't know if we can trust the nodesize/sectorsize
-        * items yet, they'll be verified later. Issue just a warning.
+        * Check sectorsize and nodesize first, other check will need it.
+        * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
         */
-       if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+       if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+           sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+               ret = -EINVAL;
+       }
+       /* Only PAGE SIZE is supported yet */
+       if (sectorsize != PAGE_CACHE_SIZE) {
+               printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+                               sectorsize, PAGE_CACHE_SIZE);
+               ret = -EINVAL;
+       }
+       if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+           nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+               ret = -EINVAL;
+       }
+       if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+               printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+                               le32_to_cpu(sb->__unused_leafsize),
+                               nodesize);
+               ret = -EINVAL;
+       }
+
+       /* Root alignment check */
+       if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
                                btrfs_super_root(sb));
-       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+               ret = -EINVAL;
+       }
+       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
                                btrfs_super_chunk_root(sb));
-       if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
-               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
-                               btrfs_super_log_root(sb));
-
-       /*
-        * Check the lower bound, the alignment and other constraints are
-        * checked later.
-        */
-       if (btrfs_super_nodesize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
-                               btrfs_super_nodesize(sb));
                ret = -EINVAL;
        }
-       if (btrfs_super_sectorsize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
-                               btrfs_super_sectorsize(sb));
+       if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+                               btrfs_super_log_root(sb));
                ret = -EINVAL;
        }
 
index 60cc139..e2287c7 100644 (file)
@@ -4139,8 +4139,10 @@ commit_trans:
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        need_commit--;
 
-                       if (need_commit > 0)
+                       if (need_commit > 0) {
+                               btrfs_start_delalloc_roots(fs_info, 0, -1);
                                btrfs_wait_ordered_roots(fs_info, -1);
+                       }
 
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
@@ -4153,11 +4155,12 @@ commit_trans:
                                if (ret)
                                        return ret;
                                /*
-                                * make sure that all running delayed iput are
-                                * done
+                                * The cleaner kthread might still be doing iput
+                                * operations. Wait for it to finish so that
+                                * more space is released.
                                 */
-                               down_write(&root->fs_info->delayed_iput_sem);
-                               up_write(&root->fs_info->delayed_iput_sem);
+                               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+                               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
                                goto again;
                        } else {
                                btrfs_end_transaction(trans, root);
@@ -10399,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
         * more device items and remove one chunk item), but this is done at
         * btrfs_remove_chunk() through a call to check_system_chunk().
         */
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        num_items = 3 + map->num_stripes;
        free_extent_map(em);
 
@@ -10586,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 
        disk_super = fs_info->super_copy;
        if (!btrfs_super_root(disk_super))
-               return 1;
+               return -EINVAL;
 
        features = btrfs_super_incompat_flags(disk_super);
        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10816,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
        }
        return 1;
 }
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+       schedule();
+       return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+       while (true) {
+               int ret;
+
+               ret = btrfs_start_write_no_snapshoting(root);
+               if (ret)
+                       break;
+               wait_on_atomic_t(&root->will_be_snapshoted,
+                                wait_snapshoting_atomic_t,
+                                TASK_UNINTERRUPTIBLE);
+       }
+}
index 6a98bdd..84fb56d 100644 (file)
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
                WARN_ON(extent_map_in_tree(em));
                WARN_ON(!list_empty(&em->list));
                if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
-                       kfree(em->bdev);
+                       kfree(em->map_lookup);
                kmem_cache_free(extent_map_cache, em);
        }
 }
index b2991fd..eb8b8fa 100644 (file)
@@ -32,7 +32,15 @@ struct extent_map {
        u64 block_len;
        u64 generation;
        unsigned long flags;
-       struct block_device *bdev;
+       union {
+               struct block_device *bdev;
+
+               /*
+                * used for chunk mappings
+                * flags & EXTENT_FLAG_FS_MAPPING must be set
+                */
+               struct map_lookup *map_lookup;
+       };
        atomic_t refs;
        unsigned int compress_type;
        struct list_head list;
index 83d7859..098bb8f 100644 (file)
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 /* simple helper to fault in pages and copy.  This should go away
  * and be replaced with calls into generic code.
  */
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
-                                        size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
                                         struct page **prepared_pages,
                                         struct iov_iter *i)
 {
@@ -1588,8 +1587,7 @@ again:
                        ret = 0;
                }
 
-               copied = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, i);
+               copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
 
                /*
                 * if we have trouble faulting in the pages, fall
@@ -1764,17 +1762,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        loff_t pos;
        size_t count;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = generic_write_checks(iocb, from);
        if (err <= 0) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
 
        current->backing_dev_info = inode_to_bdi(inode);
        err = file_remove_privs(file);
        if (err) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -1785,7 +1783,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         * to stop this write operation to ensure FS consistency.
         */
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                err = -EROFS;
                goto out;
        }
@@ -1806,7 +1804,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                end_pos = round_up(pos + count, root->sectorsize);
                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -1822,7 +1820,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                        iocb->ki_pos = pos + num_written;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * We also have to set last_sub_trans to the current log transid,
@@ -1911,7 +1909,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        atomic_inc(&root->log_batch);
        full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                             &BTRFS_I(inode)->runtime_flags);
@@ -1963,7 +1961,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                ret = start_ordered_ops(inode, start, end);
        }
        if (ret) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        atomic_inc(&root->log_batch);
@@ -2009,7 +2007,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -2033,7 +2031,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        trans->sync = true;
@@ -2056,7 +2054,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         * file again, but that will end up using the synchronization
         * inside btrfs_sync_log to keep things safe.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * If any of the ordered extents had an error, just return it to user
@@ -2305,7 +2303,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
@@ -2345,7 +2343,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                truncated_page = true;
                ret = btrfs_truncate_page(inode, offset, 0, 0);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2421,7 +2419,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                ret = btrfs_wait_ordered_range(inode, lockstart,
                                               lockend - lockstart + 1);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2576,7 +2574,7 @@ out_only_mutex:
                        ret = btrfs_end_transaction(trans, root);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret && !err)
                err = ret;
        return err;
@@ -2660,7 +2658,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        if (ret < 0)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = inode_newsize_ok(inode, alloc_end);
        if (ret)
                goto out;
@@ -2818,7 +2816,7 @@ out:
         * So this is completely used as cleanup.
         */
        btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_start,
                                       alloc_end - alloc_start);
@@ -2894,7 +2892,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
        case SEEK_END:
        case SEEK_CUR:
@@ -2903,20 +2901,20 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        case SEEK_DATA:
        case SEEK_HOLE:
                if (offset >= i_size_read(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -ENXIO;
                }
 
                ret = find_desired_extent(inode, &offset, whence);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
index 393e36b..53dbeaf 100644 (file)
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+       void *mem;
+
+       /*
+        * The allocation size varies, observed numbers were < 4K up to 16K.
+        * Using vmalloc unconditionally would be too heavy, we'll try
+        * contiguous allocations first.
+        */
+       if  (bitmap_size <= PAGE_SIZE)
+               return kzalloc(bitmap_size, GFP_NOFS);
+
+       mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+       if (mem)
+               return mem;
+
        return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
                         PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
index 8b57c17..e50316c 100644 (file)
@@ -515,7 +515,7 @@ out:
        return ret;
 }
 
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 {
        struct btrfs_path *path;
        int ret;
@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
        int ret;
        mutex_lock(&root->objectid_mutex);
 
-       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
-               ret = btrfs_find_highest_objectid(root,
-                                                 &root->highest_objectid);
-               if (ret)
-                       goto out;
-       }
-
        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
                ret = -ENOSPC;
                goto out;
index ddb347b..c8e864b 100644 (file)
@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
                         struct btrfs_trans_handle *trans);
 
 int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
 
 #endif
index 2478301..5f06eb1 100644 (file)
@@ -3134,7 +3134,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       down_read(&fs_info->delayed_iput_sem);
        spin_lock(&fs_info->delayed_iput_lock);
        while (!list_empty(&fs_info->delayed_iputs)) {
                struct btrfs_inode *inode;
@@ -3153,7 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                spin_lock(&fs_info->delayed_iput_lock);
        }
        spin_unlock(&fs_info->delayed_iput_lock);
-       up_read(&root->fs_info->delayed_iput_sem);
 }
 
 /*
@@ -4874,26 +4872,6 @@ next:
        return err;
 }
 
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
-       schedule();
-       return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
-       while (true) {
-               int ret;
-
-               ret = btrfs_start_write_no_snapshoting(root);
-               if (ret)
-                       break;
-               wait_on_atomic_t(&root->will_be_snapshoted,
-                                wait_snapshoting_atomic_t,
-                                TASK_UNINTERRUPTIBLE);
-       }
-}
-
 static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4925,7 +4903,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                 * truncation, it must capture all writes that happened before
                 * this truncation.
                 */
-               wait_for_snapshot_creation(root);
+               btrfs_wait_for_snapshot_creation(root);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret) {
                        btrfs_end_write_no_snapshoting(root);
@@ -7138,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
 
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                return ERR_PTR(ret);
        }
 
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
        return em;
 }
 
@@ -8469,7 +8467,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * not unlock the i_mutex at this case.
                 */
                if (offset + count <= inode->i_size) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        relock = true;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@ -8526,7 +8524,7 @@ out:
        if (wakeup)
                inode_dio_end(inode);
        if (relock)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        return ret;
 }
index 2a47a31..952172c 100644 (file)
@@ -240,7 +240,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ip_oldflags = ip->flags;
        i_oldflags = inode->i_flags;
@@ -358,7 +358,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        }
 
  out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
        return ret;
 }
@@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir,
                goto fail;
        }
 
+       mutex_lock(&new_root->objectid_mutex);
+       new_root->highest_objectid = new_dirid;
+       mutex_unlock(&new_root->objectid_mutex);
+
        /*
         * insert the directory item
         */
@@ -877,7 +881,7 @@ out_up_read:
 out_dput:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return error;
 }
 
@@ -1389,18 +1393,18 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        ra_index += cluster;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = compress_type;
                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out_ra;
                }
 
                defrag_count += ret;
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                if (newer_than) {
                        if (newer_off == (u64)-1)
@@ -1461,9 +1465,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
 out_ra:
        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        if (!file)
                kfree(ra);
@@ -2426,7 +2430,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_dput;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Don't allow to delete a subvolume with send in progress. This is
@@ -2539,7 +2543,7 @@ out_up_write:
                spin_unlock(&dest->root_item_lock);
        }
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err) {
                d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
@@ -2555,7 +2559,7 @@ out_unlock_inode:
 out_dput:
        dput(dentry);
 out_unlock_dir:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
@@ -2853,8 +2857,8 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
 
 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
 {
-       mutex_unlock(&inode1->i_mutex);
-       mutex_unlock(&inode2->i_mutex);
+       inode_unlock(inode1);
+       inode_unlock(inode2);
 }
 
 static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@ -2862,8 +2866,8 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
        if (inode1 < inode2)
                swap(inode1, inode2);
 
-       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(inode1, I_MUTEX_PARENT);
+       inode_lock_nested(inode2, I_MUTEX_CHILD);
 }
 
 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@ -3022,7 +3026,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                return 0;
 
        if (same_inode) {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
 
                ret = extent_same_check_offsets(src, loff, &len, olen);
                if (ret)
@@ -3097,7 +3101,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
        btrfs_cmp_data_free(&cmp);
 out_unlock:
        if (same_inode)
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        else
                btrfs_double_inode_unlock(src, dst);
 
@@ -3745,7 +3749,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
        if (!same_inode) {
                btrfs_double_inode_lock(src, inode);
        } else {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
        }
 
        /* determine range to clone */
@@ -3816,7 +3820,7 @@ out_unlock:
        if (!same_inode)
                btrfs_double_inode_unlock(src, inode);
        else
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        return ret;
 }
 
index 6d70754..5516136 100644 (file)
@@ -609,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
        return 1;
 }
 
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+                                 int index)
+{
+       return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+                                    int index)
+{
+       return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
 /*
  * helper to index into the pstripe
  */
 static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
-       index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data, index);
 }
 
 /*
@@ -626,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
        if (rbio->nr_data + 1 == rbio->real_stripes)
                return NULL;
-
-       index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
-               PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
 }
 
 /*
@@ -889,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
        int err = bio->bi_error;
+       int max_errors;
 
        if (err)
                fail_bio_stripe(rbio, bio);
@@ -901,7 +914,9 @@ static void raid_write_end_io(struct bio *bio)
        err = 0;
 
        /* OK, we have read all the stripes we need to. */
-       if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+       max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+                    0 : rbio->bbio->max_errors;
+       if (atomic_read(&rbio->error) > max_errors)
                err = -EIO;
 
        rbio_orig_end_io(rbio, err);
@@ -947,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
  */
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
-       unsigned long nr = stripe_len * nr_stripes;
-       return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+       return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
 }
 
 /*
@@ -966,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
        void *p;
 
        rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
-                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
-                       GFP_NOFS);
+                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+                      sizeof(long), GFP_NOFS);
        if (!rbio)
                return ERR_PTR(-ENOMEM);
 
@@ -1021,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
                if (!page)
                        return -ENOMEM;
                rbio->stripe_pages[i] = page;
-               ClearPageUptodate(page);
        }
        return 0;
 }
 
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
 {
        int i;
        struct page *page;
 
-       i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+       i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
 
        for (; i < rbio->nr_pages; i++) {
                if (rbio->stripe_pages[i])
@@ -1120,18 +1133,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
        }
 }
 
-/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
-       int index;
-       index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
-       index += page;
-       return rbio->stripe_pages[index];
-}
-
 /*
  * helper function to walk our bio list and populate the bio_pages array with
  * the result.  This seems expensive, but it is faster than constantly
@@ -1175,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_bio *bbio = rbio->bbio;
        void *pointers[rbio->real_stripes];
-       int stripe_len = rbio->stripe_len;
        int nr_data = rbio->nr_data;
        int stripe;
        int pagenr;
@@ -1183,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        int q_stripe = -1;
        struct bio_list bio_list;
        struct bio *bio;
-       int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
        int ret;
 
        bio_list_init(&bio_list);
@@ -1226,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        else
                clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-       for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                struct page *p;
                /* first collect one page from each data stripe */
                for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1268,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         * everything else.
         */
        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1292,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                if (!bbio->tgtdev_map[stripe])
                        continue;
 
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1506,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -1525,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         * stripe
         */
        for (stripe = 0; stripe < rbio->nr_data; stripe++) {
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        /*
                         * we want to find all the pages missing from
@@ -1801,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        int pagenr, stripe;
        void **pointers;
        int faila = -1, failb = -1;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        struct page *page;
        int err;
        int i;
@@ -1824,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
        index_rbio_pages(rbio);
 
-       for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                /*
                 * Now we just use bitmap to mark the horizontal stripes in
                 * which we have data when doing parity scrub.
@@ -1935,7 +1932,7 @@ pstripe:
                 * other endio functions will fiddle the uptodate bits
                 */
                if (rbio->operation == BTRFS_RBIO_WRITE) {
-                       for (i = 0;  i < nr_pages; i++) {
+                       for (i = 0;  i < rbio->stripe_npages; i++) {
                                if (faila != -1) {
                                        page = rbio_stripe_page(rbio, faila, i);
                                        SetPageUptodate(page);
@@ -2031,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -2055,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                        continue;
                }
 
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *p;
 
                        /*
@@ -2279,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
                        if (!page)
                                return -ENOMEM;
                        rbio->stripe_pages[index] = page;
-                       ClearPageUptodate(page);
                }
        }
        return 0;
 }
 
-/*
- * end io function used by finish_rmw.  When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-       int err = bio->bi_error;
-
-       if (bio->bi_error)
-               fail_bio_stripe(rbio, bio);
-
-       bio_put(bio);
-
-       if (!atomic_dec_and_test(&rbio->stripes_pending))
-               return;
-
-       err = 0;
-
-       if (atomic_read(&rbio->error))
-               err = -EIO;
-
-       rbio_orig_end_io(rbio, err);
-}
-
 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                                         int need_check)
 {
@@ -2462,7 +2432,7 @@ submit_write:
                        break;
 
                bio->bi_private = rbio;
-               bio->bi_end_io = raid_write_parity_end_io;
+               bio->bi_end_io = raid_write_end_io;
                submit_bio(WRITE, bio);
        }
        return;
index ef6d8fc..2bd0011 100644 (file)
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-           root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+           root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+           root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
                return 1;
        return 0;
 }
@@ -3030,7 +3031,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        int ret = 0;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = btrfs_check_data_free_space(inode, cluster->start,
                                          cluster->end + 1 - cluster->start);
@@ -3057,7 +3058,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        btrfs_free_reserved_data_space(inode, cluster->start,
                                       cluster->end + 1 - cluster->start);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 0c981eb..92bf5ee 100644 (file)
@@ -2813,7 +2813,7 @@ out:
 
 static inline int scrub_calc_parity_bitmap_len(int nsectors)
 {
-       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
 }
 
 static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3458,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
                return ret;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (em->start != chunk_offset)
                goto out;
 
@@ -4279,7 +4279,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
                return PTR_ERR(inode);
 
        /* Avoid truncate/dio/punch hole.. */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode_dio_wait(inode);
 
        physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
@@ -4358,7 +4358,7 @@ next_page:
        }
        ret = COPY_COMPLETE;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
        return ret;
 }
index 9b9eab6..d41e09f 100644 (file)
@@ -383,6 +383,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        int ret = 0;
        char *compress_type;
        bool compress_force = false;
+       enum btrfs_compression_type saved_compress_type;
+       bool saved_compress_force;
+       int no_compress = 0;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
        if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
@@ -462,6 +465,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        /* Fallthrough */
                case Opt_compress:
                case Opt_compress_type:
+                       saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+                               info->compress_type : BTRFS_COMPRESS_NONE;
+                       saved_compress_force =
+                               btrfs_test_opt(root, FORCE_COMPRESS);
                        if (token == Opt_compress ||
                            token == Opt_compress_force ||
                            strcmp(args[0].from, "zlib") == 0) {
@@ -470,6 +477,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_set_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
+                               no_compress = 0;
                        } else if (strcmp(args[0].from, "lzo") == 0) {
                                compress_type = "lzo";
                                info->compress_type = BTRFS_COMPRESS_LZO;
@@ -477,25 +485,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
                                btrfs_set_fs_incompat(info, COMPRESS_LZO);
+                               no_compress = 0;
                        } else if (strncmp(args[0].from, "no", 2) == 0) {
                                compress_type = "no";
                                btrfs_clear_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                                compress_force = false;
+                               no_compress++;
                        } else {
                                ret = -EINVAL;
                                goto out;
                        }
 
                        if (compress_force) {
-                               btrfs_set_and_info(root, FORCE_COMPRESS,
-                                                  "force %s compression",
-                                                  compress_type);
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
                        } else {
-                               if (!btrfs_test_opt(root, COMPRESS))
-                                       btrfs_info(root->fs_info,
-                                                  "btrfs: use %s compression",
-                                                  compress_type);
                                /*
                                 * If we remount from compress-force=xxx to
                                 * compress=xxx, we need clear FORCE_COMPRESS
@@ -504,6 +508,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 */
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                        }
+                       if ((btrfs_test_opt(root, COMPRESS) &&
+                            (info->compress_type != saved_compress_type ||
+                             compress_force != saved_compress_force)) ||
+                           (!btrfs_test_opt(root, COMPRESS) &&
+                            no_compress == 1)) {
+                               btrfs_info(root->fs_info,
+                                          "%s %s compression",
+                                          (compress_force) ? "force" : "use",
+                                          compress_type);
+                       }
+                       compress_force = false;
                        break;
                case Opt_ssd:
                        btrfs_set_and_info(root, SSD,
index e0ac859..539e7b5 100644 (file)
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(raid56),
        BTRFS_FEAT_ATTR_PTR(skinny_metadata),
        BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
        NULL
 };
 
@@ -780,6 +782,39 @@ failure:
        return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
index 9c09522..d7da1a4 100644 (file)
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
index b1d920b..0e1e61a 100644 (file)
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-                                               GFP_NOFS);
+                                               GFP_KERNEL);
 
        if (!fs_info)
                return fs_info;
        fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->fs_devices) {
                kfree(fs_info);
                return NULL;
        }
        fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->super_copy) {
                kfree(fs_info->fs_devices);
                kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
        struct btrfs_block_group_cache *cache;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
        if (!cache)
                return NULL;
        cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
+                                       GFP_KERNEL);
        if (!cache->free_space_ctl) {
                kfree(cache);
                return NULL;
index e29fa29..669b582 100644 (file)
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
         * test.
         */
        for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
                if (!page) {
                        test_msg("Failed to allocate test page\n");
                        ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
        start = 0;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
                test_msg("Couldn't find the locked page\n");
                goto out_bits;
        }
-       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
         *
         * We are re-using our test_start from above since it works out well.
         */
-       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
        }
        ret = 0;
 out_bits:
-       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
        if (locked_page)
                page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
        test_msg("Running extent buffer bitmap tests\n");
 
-       bitmap = kmalloc(len, GFP_NOFS);
+       bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_msg("Couldn't allocate test bitmap\n");
                return -ENOMEM;
index 5de55fd..e2d3da0 100644 (file)
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
                               EXTENT_DELALLOC | EXTENT_DIRTY |
                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
                               BTRFS_MAX_EXTENT_SIZE+8191,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1096,7 +1096,7 @@ out:
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                                NULL, GFP_NOFS);
+                                NULL, GFP_KERNEL);
        iput(inode);
        btrfs_free_dummy_root(root);
        return ret;
index 323e12c..978c3a8 100644 (file)
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                        goto out_unlock;
                }
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                if (ret) {
                        err = ret;
                        goto out_unlock;
index c32abbc..366b335 100644 (file)
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        },
 };
 
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
        [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
        [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
@@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
+       btrfs_device_data_ordered_init(dev);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
@@ -1183,7 +1184,7 @@ again:
                struct map_lookup *map;
                int i;
 
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++) {
                        u64 end;
 
@@ -2755,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                        free_extent_map(em);
                return -EINVAL;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        lock_chunks(root->fs_info->chunk_root);
        check_system_chunk(trans, extent_root, map->type);
        unlock_chunks(root->fs_info->chunk_root);
@@ -3751,7 +3752,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
                btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
                btrfs_warn(fs_info,
-       "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+       "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
                        bctl->meta.target, bctl->data.target);
        }
 
@@ -4718,7 +4719,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                goto error;
        }
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = start;
        em->len = num_bytes;
        em->block_start = 0;
@@ -4813,7 +4814,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        item_size = btrfs_chunk_item_size(map->num_stripes);
        stripe_size = em->orig_block_len;
 
@@ -4968,7 +4969,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        if (!em)
                return 1;
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        for (i = 0; i < map->num_stripes; i++) {
                if (map->stripes[i].dev->missing) {
                        miss_ndevs++;
@@ -5048,7 +5049,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                return 1;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
                ret = map->num_stripes;
        else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5084,7 +5085,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                len = map->stripe_len * nr_data_stripes(map);
        free_extent_map(em);
@@ -5105,7 +5106,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                ret = 1;
        free_extent_map(em);
@@ -5264,7 +5265,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        offset = logical - em->start;
 
        stripe_len = map->stripe_len;
@@ -5378,35 +5379,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 * target drive.
                 */
                for (i = 0; i < tmp_num_stripes; i++) {
-                       if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it
-                                * simple, only add the mirror with the
-                                * lowest physical address
-                                */
-                               if (found &&
-                                   physical_of_found <=
-                                    tmp_bbio->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found =
-                                       tmp_bbio->stripes[i].physical;
-                       }
+                       if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+                               continue;
+
+                       /*
+                        * In case of DUP, in order to keep it simple, only add
+                        * the mirror with the lowest physical address
+                        */
+                       if (found &&
+                           physical_of_found <= tmp_bbio->stripes[i].physical)
+                               continue;
+
+                       index_srcdev = i;
+                       found = 1;
+                       physical_of_found = tmp_bbio->stripes[i].physical;
                }
 
-               if (found) {
-                       mirror_num = index_srcdev + 1;
-                       patch_the_first_stripe_for_dev_replace = 1;
-                       physical_to_patch_in_first_stripe = physical_of_found;
-               } else {
+               btrfs_put_bbio(tmp_bbio);
+
+               if (!found) {
                        WARN_ON(1);
                        ret = -EIO;
-                       btrfs_put_bbio(tmp_bbio);
                        goto out;
                }
 
-               btrfs_put_bbio(tmp_bbio);
+               mirror_num = index_srcdev + 1;
+               patch_the_first_stripe_for_dev_replace = 1;
+               physical_to_patch_in_first_stripe = physical_of_found;
        } else if (mirror_num > map->num_stripes) {
                mirror_num = 0;
        }
@@ -5806,7 +5805,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                free_extent_map(em);
                return -EIO;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
 
        length = em->len;
        rmap_len = map->stripe_len;
@@ -6069,7 +6068,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        bbio->fs_info = root->fs_info;
        atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
-       if (bbio->raid_map) {
+       if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+           ((rw & WRITE) || (mirror_num > 1))) {
                /* In this case, map_length has been set to the length of
                   a single stripe; not the whole write */
                if (rw & WRITE) {
@@ -6210,6 +6210,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        struct extent_map *em;
        u64 logical;
        u64 length;
+       u64 stripe_len;
        u64 devid;
        u8 uuid[BTRFS_UUID_SIZE];
        int num_stripes;
@@ -6218,6 +6219,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       /* Validation check */
+       if (!num_stripes) {
+               btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+                         num_stripes);
+               return -EIO;
+       }
+       if (!IS_ALIGNED(logical, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                         "invalid chunk logical %llu", logical);
+               return -EIO;
+       }
+       if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                       "invalid chunk length %llu", length);
+               return -EIO;
+       }
+       if (!is_power_of_2(stripe_len)) {
+               btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+                         stripe_len);
+               return -EIO;
+       }
+       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+           btrfs_chunk_type(leaf, chunk)) {
+               btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+                         btrfs_chunk_type(leaf, chunk));
+               return -EIO;
+       }
 
        read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6234,7 +6266,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        em = alloc_extent_map();
        if (!em)
                return -ENOMEM;
-       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
        if (!map) {
                free_extent_map(em);
@@ -6242,7 +6273,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        }
 
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = logical;
        em->len = length;
        em->orig_start = 0;
@@ -6944,7 +6975,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
        /* In order to kick the device replace finish process */
        lock_chunks(root);
        list_for_each_entry(em, &transaction->pending_chunks, list) {
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
 
                for (i = 0; i < map->num_stripes; i++) {
                        dev = map->stripes[i].dev;
index fd953c3..6c68d63 100644 (file)
@@ -126,7 +126,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
         * locks the inode's i_mutex before calling setxattr or removexattr.
         */
        if (flags & XATTR_REPLACE) {
-               ASSERT(mutex_is_locked(&inode->i_mutex));
+               ASSERT(inode_is_locked(inode));
                di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
                                        name, name_len, 0);
                if (!di)
index afa023d..675a333 100644 (file)
@@ -446,7 +446,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
                return 0;
 
        cachefiles_begin_secure(cache, &saved_cred);
-       mutex_lock(&d_inode(object->backer)->i_mutex);
+       inode_lock(d_inode(object->backer));
 
        /* if there's an extension to a partial page at the end of the backing
         * file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
        ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
-       mutex_unlock(&d_inode(object->backer)->i_mutex);
+       inode_unlock(d_inode(object->backer));
        cachefiles_end_secure(cache, saved_cred);
 
        if (ret == -EIO) {
index c4b8934..1c2334c 100644 (file)
@@ -295,7 +295,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                                cachefiles_mark_object_buried(cache, rep, why);
                }
 
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
 
                if (ret == -EIO)
                        cachefiles_io_error(cache, "Unlink failed");
@@ -306,7 +306,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 
        /* directories have to be moved to the graveyard */
        _debug("move stale object to graveyard");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
 try_again:
        /* first step is to make up a grave dentry in the graveyard */
@@ -423,13 +423,13 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 
        dir = dget_parent(object->dentry);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
                /* object allocation for the same key preemptively deleted this
                 * object's file so that it could create its own file */
                _debug("object preemptively buried");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                ret = 0;
        } else {
                /* we need to check that our parent is _still_ our parent - it
@@ -442,7 +442,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
                        /* it got moved, presumably by cachefilesd culling it,
                         * so it's no longer in the key path and we can ignore
                         * it */
-                       mutex_unlock(&d_inode(dir)->i_mutex);
+                       inode_unlock(d_inode(dir));
                        ret = 0;
                }
        }
@@ -501,7 +501,7 @@ lookup_again:
        /* search the current directory for the element name */
        _debug("lookup '%s'", name);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        next = lookup_one_len(name, dir, nlen);
@@ -585,7 +585,7 @@ lookup_again:
        /* process the next component */
        if (key) {
                _debug("advance");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(dir);
                dir = next;
                next = NULL;
@@ -623,7 +623,7 @@ lookup_again:
        /* note that we're now using this object */
        ret = cachefiles_mark_object_active(cache, object);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
        dir = NULL;
 
@@ -705,7 +705,7 @@ lookup_error:
                cachefiles_io_error(cache, "Lookup failed");
        next = NULL;
 error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(next);
 error_out2:
        dput(dir);
@@ -729,7 +729,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        _enter(",,%s", dirname);
 
        /* search the current directory for the element name */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        start = jiffies;
        subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                       d_backing_inode(subdir)->i_ino);
        }
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        /* we need to make sure the subdir is a directory */
        ASSERT(d_backing_inode(subdir));
@@ -800,19 +800,19 @@ check_error:
        return ERR_PTR(ret);
 
 mkdir_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(subdir);
        pr_err("mkdir %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(subdir);
        pr_err("Lookup %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 nomem_d_alloc:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        _leave(" = -ENOMEM");
        return ERR_PTR(-ENOMEM);
 }
@@ -837,7 +837,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
        //       dir, filename);
 
        /* look up the victim */
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        victim = lookup_one_len(filename, dir, strlen(filename));
@@ -852,7 +852,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
         * at the netfs's request whilst the cull was in progress
         */
        if (d_is_negative(victim)) {
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(victim);
                _leave(" = -ENOENT [absent]");
                return ERR_PTR(-ENOENT);
@@ -881,13 +881,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 
 object_in_use:
        read_unlock(&cache->active_lock);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = -EBUSY [in use]");
        return ERR_PTR(-EBUSY);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(victim);
        if (ret == -ENOENT) {
                /* file or dir now absent - probably retired by netfs */
@@ -947,7 +947,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
        return 0;
 
 error_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 error:
        dput(victim);
        if (ret == -ENOENT) {
@@ -982,7 +982,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
        if (IS_ERR(victim))
                return PTR_ERR(victim);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = 0");
        return 0;
index b7d218a..c222137 100644 (file)
@@ -1108,7 +1108,7 @@ retry_locked:
                return 0;
 
        /* past end of file? */
-       i_size = inode->i_size;   /* caller holds i_mutex */
+       i_size = i_size_read(inode);
 
        if (page_off >= i_size ||
            (pos_in_page == 0 && (pos+len) >= i_size &&
@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                page = grab_cache_page_write_begin(mapping, index, 0);
                if (!page)
                        return -ENOMEM;
-               *pagep = page;
 
                dout("write_begin file %p inode %p page %p %d~%d\n", file,
                     inode, page, (int)pos, (int)len);
@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
                zero_user_segment(page, from+copied, len);
 
        /* did file size increase? */
-       /* (no need for i_size_read(); we caller holds i_mutex */
-       if (pos+copied > inode->i_size)
+       if (pos+copied > i_size_read(inode))
                check_cap = ceph_inode_set_size(inode, pos+copied);
 
        if (!PageUptodate(page))
@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        ret = VM_FAULT_NOPAGE;
        if ((off > size) ||
-           (page->mapping != inode->i_mapping))
+           (page->mapping != inode->i_mapping)) {
+               unlock_page(page);
                goto out;
+       }
 
        ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
-       if (ret == 0) {
+       if (ret >= 0) {
                /* success.  we'll keep the page locked. */
                set_page_dirty(page);
                ret = VM_FAULT_LOCKED;
@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                        ret = VM_FAULT_SIGBUS;
        }
 out:
-       if (ret != VM_FAULT_LOCKED)
-               unlock_page(page);
        if (ret == VM_FAULT_LOCKED ||
            ci->i_inline_version != CEPH_INLINE_NONE) {
                int dirty;
index a4766de..a351480 100644 (file)
@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        memcpy(buffer, &aux, sizeof(aux));
 
@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
                                        uint64_t *size)
 {
        const struct ceph_inode_info* ci = cookie_netfs_data;
-       const struct inode* inode = &ci->vfs_inode;
-
-       *size = inode->i_size;
+       *size = i_size_read(&ci->vfs_inode);
 }
 
 static enum fscache_checkaux ceph_fscache_inode_check_aux(
@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        if (memcmp(data, &aux, sizeof(aux)) != 0)
                return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,7 +195,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                return;
 
        /* Avoid multiple racing open requests */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ci->fscache)
                goto done;
@@ -207,7 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                                             ci, true);
        fscache_check_consistency(ci->fscache);
 done:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
 }
 
index c69e125..cdbf8cf 100644 (file)
@@ -2030,7 +2030,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (datasync)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2046,7 +2046,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
index 9314b4e..fd11fb2 100644 (file)
@@ -507,7 +507,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
        loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = -EINVAL;
        switch (whence) {
        case SEEK_CUR:
@@ -542,7 +542,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 
index fe02ae7..3b31723 100644 (file)
@@ -215,7 +215,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        req->r_inode = d_inode(child);
        ihold(d_inode(child));
@@ -224,7 +224,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        if (!err) {
                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
index 3c68e6a..eb9028e 100644 (file)
@@ -397,8 +397,9 @@ int ceph_release(struct inode *inode, struct file *file)
 }
 
 enum {
-       CHECK_EOF = 1,
-       READ_INLINE = 2,
+       HAVE_RETRIED = 1,
+       CHECK_EOF =    2,
+       READ_INLINE =  3,
 };
 
 /*
@@ -411,17 +412,15 @@ enum {
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
                        struct page **pages, int num_pages,
-                       int *checkeof, bool o_direct,
-                       unsigned long buf_align)
+                       int *checkeof)
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len, left;
-       int io_align, page_align;
-       int pages_left;
-       int read;
+       loff_t i_size;
+       int page_align, pages_left;
+       int read, ret;
        struct page **page_pos;
-       int ret;
        bool hit_stripe, was_short;
 
        /*
@@ -432,13 +431,9 @@ static int striped_read(struct inode *inode,
        page_pos = pages;
        pages_left = num_pages;
        read = 0;
-       io_align = off & ~PAGE_MASK;
 
 more:
-       if (o_direct)
-               page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-       else
-               page_align = pos & ~PAGE_MASK;
+       page_align = pos & ~PAGE_MASK;
        this_len = left;
        ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
@@ -452,13 +447,12 @@ more:
        dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
             ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
 
+       i_size = i_size_read(inode);
        if (ret >= 0) {
                int didpages;
-               if (was_short && (pos + ret < inode->i_size)) {
-                       int zlen = min(this_len - ret,
-                                      inode->i_size - pos - ret);
-                       int zoff = (o_direct ? buf_align : io_align) +
-                                   read + ret;
+               if (was_short && (pos + ret < i_size)) {
+                       int zlen = min(this_len - ret, i_size - pos - ret);
+                       int zoff = (off & ~PAGE_MASK) + read + ret;
                        dout(" zero gap %llu to %llu\n",
                                pos + ret, pos + ret + zlen);
                        ceph_zero_page_vector_range(zoff, zlen, pages);
@@ -473,14 +467,14 @@ more:
                pages_left -= didpages;
 
                /* hit stripe and need continue*/
-               if (left && hit_stripe && pos < inode->i_size)
+               if (left && hit_stripe && pos < i_size)
                        goto more;
        }
 
        if (read > 0) {
                ret = read;
                /* did we bounce off eof? */
-               if (pos + left > inode->i_size)
+               if (pos + left > i_size)
                        *checkeof = CHECK_EOF;
        }
 
@@ -521,54 +515,28 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        if (ret < 0)
                return ret;
 
-       if (iocb->ki_flags & IOCB_DIRECT) {
-               while (iov_iter_count(i)) {
-                       size_t start;
-                       ssize_t n;
-
-                       n = dio_get_pagev_size(i);
-                       pages = dio_get_pages_alloc(i, n, &start, &num_pages);
-                       if (IS_ERR(pages))
-                               return PTR_ERR(pages);
-
-                       ret = striped_read(inode, off, n,
-                                          pages, num_pages, checkeof,
-                                          1, start);
-
-                       ceph_put_page_vector(pages, num_pages, true);
-
-                       if (ret <= 0)
-                               break;
-                       off += ret;
-                       iov_iter_advance(i, ret);
-                       if (ret < n)
+       num_pages = calc_pages_for(off, len);
+       pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+       if (IS_ERR(pages))
+               return PTR_ERR(pages);
+       ret = striped_read(inode, off, len, pages,
+                               num_pages, checkeof);
+       if (ret > 0) {
+               int l, k = 0;
+               size_t left = ret;
+
+               while (left) {
+                       size_t page_off = off & ~PAGE_MASK;
+                       size_t copy = min_t(size_t, left,
+                                           PAGE_SIZE - page_off);
+                       l = copy_page_to_iter(pages[k++], page_off, copy, i);
+                       off += l;
+                       left -= l;
+                       if (l < copy)
                                break;
                }
-       } else {
-               num_pages = calc_pages_for(off, len);
-               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-               if (IS_ERR(pages))
-                       return PTR_ERR(pages);
-               ret = striped_read(inode, off, len, pages,
-                                       num_pages, checkeof, 0, 0);
-               if (ret > 0) {
-                       int l, k = 0;
-                       size_t left = ret;
-
-                       while (left) {
-                               size_t page_off = off & ~PAGE_MASK;
-                               size_t copy = min_t(size_t,
-                                                   PAGE_SIZE - page_off, left);
-                               l = copy_page_to_iter(pages[k++], page_off,
-                                                     copy, i);
-                               off += l;
-                               left -= l;
-                               if (l < copy)
-                                       break;
-                       }
-               }
-               ceph_release_page_vector(pages, num_pages);
        }
+       ceph_release_page_vector(pages, num_pages);
 
        if (off > iocb->ki_pos) {
                ret = off - iocb->ki_pos;
@@ -579,6 +547,193 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        return ret;
 }
 
+struct ceph_aio_request {
+       struct kiocb *iocb;
+       size_t total_len;
+       int write;
+       int error;
+       struct list_head osd_reqs;
+       unsigned num_reqs;
+       atomic_t pending_reqs;
+       struct timespec mtime;
+       struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+       struct work_struct work;
+       struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+                             struct ceph_aio_request *aio_req)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret;
+
+       if (!atomic_dec_and_test(&aio_req->pending_reqs))
+               return;
+
+       ret = aio_req->error;
+       if (!ret)
+               ret = aio_req->total_len;
+
+       dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+       if (ret >= 0 && aio_req->write) {
+               int dirty;
+
+               loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+               if (endoff > i_size_read(inode)) {
+                       if (ceph_inode_set_size(inode, endoff))
+                               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+               }
+
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_inline_version = CEPH_INLINE_NONE;
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &aio_req->prealloc_cf);
+               spin_unlock(&ci->i_ceph_lock);
+               if (dirty)
+                       __mark_inode_dirty(inode, dirty);
+
+       }
+
+       ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+                                               CEPH_CAP_FILE_RD));
+
+       aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+       ceph_free_cap_flush(aio_req->prealloc_cf);
+       kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+                                 struct ceph_msg *msg)
+{
+       int rc = req->r_result;
+       struct inode *inode = req->r_inode;
+       struct ceph_aio_request *aio_req = req->r_priv;
+       struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       int num_pages = calc_pages_for((u64)osd_data->alignment,
+                                      osd_data->length);
+
+       dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+            inode, rc, osd_data->length);
+
+       if (rc == -EOLDSNAPC) {
+               struct ceph_aio_work *aio_work;
+               BUG_ON(!aio_req->write);
+
+               aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+               if (aio_work) {
+                       INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+                       aio_work->req = req;
+                       queue_work(ceph_inode_to_client(inode)->wb_wq,
+                                  &aio_work->work);
+                       return;
+               }
+               rc = -ENOMEM;
+       } else if (!aio_req->write) {
+               if (rc == -ENOENT)
+                       rc = 0;
+               if (rc >= 0 && osd_data->length > rc) {
+                       int zoff = osd_data->alignment + rc;
+                       int zlen = osd_data->length - rc;
+                       /*
+                        * If read is satisfied by single OSD request,
+                        * it can pass EOF. Otherwise read is within
+                        * i_size.
+                        */
+                       if (aio_req->num_reqs == 1) {
+                               loff_t i_size = i_size_read(inode);
+                               loff_t endoff = aio_req->iocb->ki_pos + rc;
+                               if (endoff < i_size)
+                                       zlen = min_t(size_t, zlen,
+                                                    i_size - endoff);
+                               aio_req->total_len = rc + zlen;
+                       }
+
+                       if (zlen > 0)
+                               ceph_zero_page_vector_range(zoff, zlen,
+                                                           osd_data->pages);
+               }
+       }
+
+       ceph_put_page_vector(osd_data->pages, num_pages, false);
+       ceph_osdc_put_request(req);
+
+       if (rc < 0)
+               cmpxchg(&aio_req->error, 0, rc);
+
+       ceph_aio_complete(inode, aio_req);
+       return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+       struct ceph_aio_work *aio_work =
+               container_of(work, struct ceph_aio_work, work);
+       struct ceph_osd_request *orig_req = aio_work->req;
+       struct ceph_aio_request *aio_req = orig_req->r_priv;
+       struct inode *inode = orig_req->r_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_snap_context *snapc;
+       struct ceph_osd_request *req;
+       int ret;
+
+       spin_lock(&ci->i_ceph_lock);
+       if (__ceph_have_pending_cap_snap(ci)) {
+               struct ceph_cap_snap *capsnap =
+                       list_last_entry(&ci->i_cap_snaps,
+                                       struct ceph_cap_snap,
+                                       ci_item);
+               snapc = ceph_get_snap_context(capsnap->context);
+       } else {
+               BUG_ON(!ci->i_head_snapc);
+               snapc = ceph_get_snap_context(ci->i_head_snapc);
+       }
+       spin_unlock(&ci->i_ceph_lock);
+
+       req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+                       false, GFP_NOFS);
+       if (!req) {
+               ret = -ENOMEM;
+               req = orig_req;
+               goto out;
+       }
+
+       req->r_flags =  CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       req->r_base_oloc = orig_req->r_base_oloc;
+       req->r_base_oid = orig_req->r_base_oid;
+
+       req->r_ops[0] = orig_req->r_ops[0];
+       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+       ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+                               snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+       ceph_osdc_put_request(orig_req);
+
+       req->r_callback = ceph_aio_complete_req;
+       req->r_inode = inode;
+       req->r_priv = aio_req;
+
+       ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+       if (ret < 0) {
+               BUG_ON(ret == -EOLDSNAPC);
+               req->r_result = ret;
+               ceph_aio_complete_req(req, NULL);
+       }
+
+       ceph_put_snap_context(snapc);
+       kfree(aio_work);
+}
+
 /*
  * Write commit request unsafe callback, called to tell us when a
  * request is unsafe (that is, in flight--has been handed to the
@@ -612,16 +767,10 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
 }
 
 
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes.  (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
-                      struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+                      struct ceph_snap_context *snapc,
+                      struct ceph_cap_flush **pcf)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -630,44 +779,52 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct page **pages;
-       int num_pages;
-       int written = 0;
+       struct ceph_aio_request *aio_req = NULL;
+       int num_pages = 0;
        int flags;
-       int check_caps = 0;
        int ret;
        struct timespec mtime = CURRENT_TIME;
-       size_t count = iov_iter_count(from);
+       size_t count = iov_iter_count(iter);
+       loff_t pos = iocb->ki_pos;
+       bool write = iov_iter_rw(iter) == WRITE;
 
-       if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+       if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
 
-       dout("sync_direct_write on file %p %lld~%u\n", file, pos,
-            (unsigned)count);
+       dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+            (write ? "write" : "read"), file, pos, (unsigned)count);
 
        ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
        if (ret < 0)
                return ret;
 
-       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                           pos >> PAGE_CACHE_SHIFT,
-                                           (pos + count) >> PAGE_CACHE_SHIFT);
-       if (ret < 0)
-               dout("invalidate_inode_pages2_range returned %d\n", ret);
+       if (write) {
+               ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                       pos >> PAGE_CACHE_SHIFT,
+                                       (pos + count) >> PAGE_CACHE_SHIFT);
+               if (ret < 0)
+                       dout("invalidate_inode_pages2_range returned %d\n", ret);
 
-       flags = CEPH_OSD_FLAG_ORDERSNAP |
-               CEPH_OSD_FLAG_ONDISK |
-               CEPH_OSD_FLAG_WRITE;
+               flags = CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       } else {
+               flags = CEPH_OSD_FLAG_READ;
+       }
 
-       while (iov_iter_count(from) > 0) {
-               u64 len = dio_get_pagev_size(from);
-               size_t start;
-               ssize_t n;
+       while (iov_iter_count(iter) > 0) {
+               u64 size = dio_get_pagev_size(iter);
+               size_t start = 0;
+               ssize_t len;
 
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                                           vino, pos, &len, 0,
-                                           2,/*include a 'startsync' command*/
-                                           CEPH_OSD_OP_WRITE, flags, snapc,
+                                           vino, pos, &size, 0,
+                                           /*include a 'startsync' command*/
+                                           write ? 2 : 1,
+                                           write ? CEPH_OSD_OP_WRITE :
+                                                   CEPH_OSD_OP_READ,
+                                           flags, snapc,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
                                            false);
@@ -676,10 +833,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                        break;
                }
 
-               osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
-               n = len;
-               pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+               len = size;
+               pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
                if (IS_ERR(pages)) {
                        ceph_osdc_put_request(req);
                        ret = PTR_ERR(pages);
@@ -687,47 +842,128 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                }
 
                /*
-                * throw out any page cache pages in this range. this
-                * may block.
+                * To simplify error handling, allow AIO when IO within i_size
+                * or IO can be satisfied by single OSD request.
                 */
-               truncate_inode_pages_range(inode->i_mapping, pos,
-                                  (pos+n) | (PAGE_CACHE_SIZE-1));
-               osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
-                                               false, false);
+               if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+                   (len == count || pos + count <= i_size_read(inode))) {
+                       aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+                       if (aio_req) {
+                               aio_req->iocb = iocb;
+                               aio_req->write = write;
+                               INIT_LIST_HEAD(&aio_req->osd_reqs);
+                               if (write) {
+                                       aio_req->mtime = mtime;
+                                       swap(aio_req->prealloc_cf, *pcf);
+                               }
+                       }
+                       /* ignore error */
+               }
+
+               if (write) {
+                       /*
+                        * throw out any page cache pages in this range. this
+                        * may block.
+                        */
+                       truncate_inode_pages_range(inode->i_mapping, pos,
+                                       (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+                       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+               }
+
+
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+                                                false, false);
 
-               /* BUG_ON(vino.snap != CEPH_NOSNAP); */
                ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
 
-               ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+               if (aio_req) {
+                       aio_req->total_len += len;
+                       aio_req->num_reqs++;
+                       atomic_inc(&aio_req->pending_reqs);
+
+                       req->r_callback = ceph_aio_complete_req;
+                       req->r_inode = inode;
+                       req->r_priv = aio_req;
+                       list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+                       pos += len;
+                       iov_iter_advance(iter, len);
+                       continue;
+               }
+
+               ret = ceph_osdc_start_request(req->r_osdc, req, false);
                if (!ret)
                        ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 
+               size = i_size_read(inode);
+               if (!write) {
+                       if (ret == -ENOENT)
+                               ret = 0;
+                       if (ret >= 0 && ret < len && pos + ret < size) {
+                               int zlen = min_t(size_t, len - ret,
+                                                size - pos - ret);
+                               ceph_zero_page_vector_range(start + ret, zlen,
+                                                           pages);
+                               ret += zlen;
+                       }
+                       if (ret >= 0)
+                               len = ret;
+               }
+
                ceph_put_page_vector(pages, num_pages, false);
 
                ceph_osdc_put_request(req);
-               if (ret)
+               if (ret < 0)
+                       break;
+
+               pos += len;
+               iov_iter_advance(iter, len);
+
+               if (!write && pos >= size)
                        break;
-               pos += n;
-               written += n;
-               iov_iter_advance(from, n);
 
-               if (pos > i_size_read(inode)) {
-                       check_caps = ceph_inode_set_size(inode, pos);
-                       if (check_caps)
+               if (write && pos > size) {
+                       if (ceph_inode_set_size(inode, pos))
                                ceph_check_caps(ceph_inode(inode),
                                                CHECK_CAPS_AUTHONLY,
                                                NULL);
                }
        }
 
-       if (ret != -EOLDSNAPC && written > 0) {
+       if (aio_req) {
+               if (aio_req->num_reqs == 0) {
+                       kfree(aio_req);
+                       return ret;
+               }
+
+               ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+                                             CEPH_CAP_FILE_RD);
+
+               while (!list_empty(&aio_req->osd_reqs)) {
+                       req = list_first_entry(&aio_req->osd_reqs,
+                                              struct ceph_osd_request,
+                                              r_unsafe_item);
+                       list_del_init(&req->r_unsafe_item);
+                       if (ret >= 0)
+                               ret = ceph_osdc_start_request(req->r_osdc,
+                                                             req, false);
+                       if (ret < 0) {
+                               BUG_ON(ret == -EOLDSNAPC);
+                               req->r_result = ret;
+                               ceph_aio_complete_req(req, NULL);
+                       }
+               }
+               return -EIOCBQUEUED;
+       }
+
+       if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+               ret = pos - iocb->ki_pos;
                iocb->ki_pos = pos;
-               ret = written;
        }
        return ret;
 }
 
-
 /*
  * Synchronous write, straight from __user pointer or user pages.
  *
@@ -897,8 +1133,14 @@ again:
                     ceph_cap_string(got));
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
-                       /* hmm, this isn't really async... */
-                       ret = ceph_sync_read(iocb, to, &retry_op);
+                       if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                               ret = ceph_direct_read_write(iocb, to,
+                                                            NULL, NULL);
+                               if (ret >= 0 && ret < len)
+                                       retry_op = CHECK_EOF;
+                       } else {
+                               ret = ceph_sync_read(iocb, to, &retry_op);
+                       }
                } else {
                        retry_op = READ_INLINE;
                }
@@ -916,7 +1158,7 @@ again:
                pinned_page = NULL;
        }
        ceph_put_cap_refs(ci, got);
-       if (retry_op && ret >= 0) {
+       if (retry_op > HAVE_RETRIED && ret >= 0) {
                int statret;
                struct page *page = NULL;
                loff_t i_size;
@@ -968,12 +1210,11 @@ again:
                if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
                    ret < len) {
                        dout("sync_read hit hole, ppos %lld < size %lld"
-                            ", reading more\n", iocb->ki_pos,
-                            inode->i_size);
+                            ", reading more\n", iocb->ki_pos, i_size);
 
                        read += ret;
                        len -= ret;
-                       retry_op = 0;
+                       retry_op = HAVE_RETRIED;
                        goto again;
                }
        }
@@ -1014,7 +1255,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1052,7 +1293,7 @@ retry_snap:
        }
 
        dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
-            inode, ceph_vinop(inode), pos, count, inode->i_size);
+            inode, ceph_vinop(inode), pos, count, i_size_read(inode));
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
                want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
        else
@@ -1070,7 +1311,7 @@ retry_snap:
            (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
                struct ceph_snap_context *snapc;
                struct iov_iter data;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                spin_lock(&ci->i_ceph_lock);
                if (__ceph_have_pending_cap_snap(ci)) {
@@ -1088,8 +1329,8 @@ retry_snap:
                /* we might need to revert back to that point */
                data = *from;
                if (iocb->ki_flags & IOCB_DIRECT)
-                       written = ceph_sync_direct_write(iocb, &data, pos,
-                                                        snapc);
+                       written = ceph_direct_read_write(iocb, &data, snapc,
+                                                        &prealloc_cf);
                else
                        written = ceph_sync_write(iocb, &data, pos, snapc);
                if (written == -EOLDSNAPC) {
@@ -1097,14 +1338,14 @@ retry_snap:
                                "got EOLDSNAPC, retrying\n",
                                inode, ceph_vinop(inode),
                                pos, (unsigned)count);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        goto retry_snap;
                }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
        } else {
-               loff_t old_size = inode->i_size;
+               loff_t old_size = i_size_read(inode);
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -1115,9 +1356,9 @@ retry_snap:
                written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
-               if (inode->i_size > old_size)
+               if (i_size_read(inode) > old_size)
                        ceph_fscache_update_objectsize(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (written >= 0) {
@@ -1147,7 +1388,7 @@ retry_snap:
        goto out_unlocked;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_unlocked:
        ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
@@ -1160,9 +1401,10 @@ out_unlocked:
 static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
+       loff_t i_size;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
                ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -1172,9 +1414,10 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 
+       i_size = i_size_read(inode);
        switch (whence) {
        case SEEK_END:
-               offset += inode->i_size;
+               offset += i_size;
                break;
        case SEEK_CUR:
                /*
@@ -1190,24 +1433,24 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                offset += file->f_pos;
                break;
        case SEEK_DATA:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
                break;
        case SEEK_HOLE:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
-               offset = inode->i_size;
+               offset = i_size;
                break;
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -1363,7 +1606,7 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
                ret = -EROFS;
@@ -1418,7 +1661,7 @@ static long ceph_fallocate(struct file *file, int mode,
 
        ceph_put_cap_refs(ci, got);
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ceph_free_cap_flush(prealloc_cf);
        return ret;
 }
index da55eb8..fb4ba2e 100644 (file)
@@ -548,7 +548,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
        if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
            (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
                dout("size %lld -> %llu\n", inode->i_size, size);
-               inode->i_size = size;
+               i_size_write(inode, size);
                inode->i_blocks = (size + (1<<9) - 1) >> 9;
                ci->i_reported_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
@@ -808,7 +808,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                        spin_unlock(&ci->i_ceph_lock);
 
                        err = -EINVAL;
-                       if (WARN_ON(symlen != inode->i_size))
+                       if (WARN_ON(symlen != i_size_read(inode)))
                                goto out;
 
                        err = -ENOMEM;
@@ -1549,7 +1549,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
 
        spin_lock(&ci->i_ceph_lock);
        dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
-       inode->i_size = size;
+       i_size_write(inode, size);
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
 
        /* tell the MDS if we are approaching max_size */
@@ -1911,7 +1911,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                     inode->i_size, attr->ia_size);
                if ((issued & CEPH_CAP_FILE_EXCL) &&
                    attr->ia_size > inode->i_size) {
-                       inode->i_size = attr->ia_size;
+                       i_size_write(inode, attr->ia_size);
                        inode->i_blocks =
                                (attr->ia_size + (1 << 9) - 1) >> 9;
                        inode->i_ctime = attr->ia_ctime;
index 7febcf2..50b2684 100644 (file)
@@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
        vaf.fmt = fmt;
        vaf.va = &args;
 
-       pr_err("CIFS VFS: %pV", &vaf);
+       pr_err_ratelimited("CIFS VFS: %pV", &vaf);
 
        va_end(args);
 }
index f40fbac..66cf0f9 100644 (file)
@@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
 /* information message: e.g., configuration, major event */
 #define cifs_dbg(type, fmt, ...)                                       \
 do {                                                                   \
-       if (type == FYI) {                                              \
-               if (cifsFYI & CIFS_INFO) {                              \
-                       pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__);  \
-               }                                                       \
+       if (type == FYI && cifsFYI & CIFS_INFO) {                       \
+               pr_debug_ratelimited("%s: "                             \
+                           fmt, __FILE__, ##__VA_ARGS__);              \
        } else if (type == VFS) {                                       \
                cifs_vfs_err(fmt, ##__VA_ARGS__);                       \
        } else if (type == NOISY && type != 0) {                        \
-               pr_debug(fmt, ##__VA_ARGS__);                           \
+               pr_debug_ratelimited(fmt, ##__VA_ARGS__);               \
        }                                                               \
 } while (0)
 
index c4c1169..c48ca13 100644 (file)
@@ -507,6 +507,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
        seq_printf(s, ",rsize=%u", cifs_sb->rsize);
        seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+       seq_printf(s, ",echo_interval=%lu",
+                       tcon->ses->server->echo_interval / HZ);
        /* convert actimeo and display it in seconds */
        seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
@@ -640,9 +642,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
                while (*s && *s != sep)
                        s++;
 
-               mutex_lock(&dir->i_mutex);
+               inode_lock(dir);
                child = lookup_one_len(p, dentry, s - p);
-               mutex_unlock(&dir->i_mutex);
+               inode_unlock(dir);
                dput(dentry);
                dentry = child;
        } while (!IS_ERR(dentry));
@@ -752,6 +754,9 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t rc;
        struct inode *inode = file_inode(iocb->ki_filp);
 
+       if (iocb->ki_filp->f_flags & O_DIRECT)
+               return cifs_user_readv(iocb, iter);
+
        rc = cifs_revalidate_mapping(inode);
        if (rc)
                return rc;
@@ -766,6 +771,18 @@ static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written;
        int rc;
 
+       if (iocb->ki_filp->f_flags & O_DIRECT) {
+               written = cifs_user_writev(iocb, from);
+               if (written > 0 && CIFS_CACHE_READ(cinode)) {
+                       cifs_zap_mapping(inode);
+                       cifs_dbg(FYI,
+                                "Set no oplock for inode=%p after a write operation\n",
+                                inode);
+                       cinode->oplock = 0;
+               }
+               return written;
+       }
+
        written = cifs_get_writer(cinode);
        if (written)
                return written;
index 2b510c5..a25b251 100644 (file)
 #define SERVER_NAME_LENGTH 40
 #define SERVER_NAME_LEN_WITH_NULL     (SERVER_NAME_LENGTH + 1)
 
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
 
 #include "cifspdu.h"
 
@@ -225,7 +227,7 @@ struct smb_version_operations {
        void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
        void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
        /* verify the message */
-       int (*check_message)(char *, unsigned int);
+       int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
        bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
        void (*downgrade_oplock)(struct TCP_Server_Info *,
                                        struct cifsInodeInfo *, bool);
@@ -507,6 +509,7 @@ struct smb_vol {
        struct sockaddr_storage dstaddr; /* destination address */
        struct sockaddr_storage srcaddr; /* allow binding to a local IP */
        struct nls_table *local_nls;
+       unsigned int echo_interval; /* echo interval in secs */
 };
 
 #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -627,7 +630,9 @@ struct TCP_Server_Info {
 #ifdef CONFIG_CIFS_SMB2
        unsigned int    max_read;
        unsigned int    max_write;
+       __u8            preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
+       unsigned long echo_interval;
 };
 
 static inline unsigned int
@@ -809,7 +814,10 @@ struct cifs_ses {
        bool need_reconnect:1; /* connection reset, uid now invalid */
 #ifdef CONFIG_CIFS_SMB2
        __u16 session_flags;
-       char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+       __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
 };
 
index c63fd1d..eed7ff5 100644 (file)
@@ -102,7 +102,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
                        struct smb_hdr *out_buf,
                        int *bytes_returned);
 extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
 extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
 extern bool backup_cred(struct cifs_sb_info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -439,7 +439,8 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
 extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
 extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern int calc_lanman_hash(const char *password, const char *cryptkey,
index ecb0803..4fbd92d 100644 (file)
@@ -95,6 +95,7 @@ enum {
        Opt_cruid, Opt_gid, Opt_file_mode,
        Opt_dirmode, Opt_port,
        Opt_rsize, Opt_wsize, Opt_actimeo,
+       Opt_echo_interval,
 
        /* Mount options which take string value */
        Opt_user, Opt_pass, Opt_ip,
@@ -188,6 +189,7 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_rsize, "rsize=%s" },
        { Opt_wsize, "wsize=%s" },
        { Opt_actimeo, "actimeo=%s" },
+       { Opt_echo_interval, "echo_interval=%s" },
 
        { Opt_blank_user, "user=" },
        { Opt_blank_user, "username=" },
@@ -368,7 +370,6 @@ cifs_reconnect(struct TCP_Server_Info *server)
        server->session_key.response = NULL;
        server->session_key.len = 0;
        server->lstrp = jiffies;
-       mutex_unlock(&server->srv_mutex);
 
        /* mark submitted MIDs for retry and issue callback */
        INIT_LIST_HEAD(&retry_list);
@@ -381,6 +382,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                list_move(&mid_entry->qhead, &retry_list);
        }
        spin_unlock(&GlobalMid_Lock);
+       mutex_unlock(&server->srv_mutex);
 
        cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
        list_for_each_safe(tmp, tmp2, &retry_list) {
@@ -418,6 +420,7 @@ cifs_echo_request(struct work_struct *work)
        int rc;
        struct TCP_Server_Info *server = container_of(work,
                                        struct TCP_Server_Info, echo.work);
+       unsigned long echo_interval = server->echo_interval;
 
        /*
         * We cannot send an echo if it is disabled or until the
@@ -427,7 +430,7 @@ cifs_echo_request(struct work_struct *work)
         */
        if (!server->ops->need_neg || server->ops->need_neg(server) ||
            (server->ops->can_echo && !server->ops->can_echo(server)) ||
-           time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+           time_before(jiffies, server->lstrp + echo_interval - HZ))
                goto requeue_echo;
 
        rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -436,7 +439,7 @@ cifs_echo_request(struct work_struct *work)
                         server->hostname);
 
 requeue_echo:
-       queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
 }
 
 static bool
@@ -487,9 +490,9 @@ server_unresponsive(struct TCP_Server_Info *server)
         *     a response in >60s.
         */
        if (server->tcpStatus == CifsGood &&
-           time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
-               cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
-                        server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+           time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+               cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+                        server->hostname, (2 * server->echo_interval) / HZ);
                cifs_reconnect(server);
                wake_up(&server->response_q);
                return true;
@@ -828,7 +831,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
         * 48 bytes is enough to display the header and a little bit
         * into the payload for debugging purposes.
         */
-       length = server->ops->check_message(buf, server->total_read);
+       length = server->ops->check_message(buf, server->total_read, server);
        if (length != 0)
                cifs_dump_mem("Bad SMB: ", buf,
                        min_t(unsigned int, server->total_read, 48));
@@ -1624,6 +1627,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                goto cifs_parse_mount_err;
                        }
                        break;
+               case Opt_echo_interval:
+                       if (get_option_ul(args, &option)) {
+                               cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+                                        __func__);
+                               goto cifs_parse_mount_err;
+                       }
+                       vol->echo_interval = option;
+                       break;
 
                /* String Arguments */
 
@@ -2089,6 +2100,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
        if (!match_security(server, vol))
                return 0;
 
+       if (server->echo_interval != vol->echo_interval)
+               return 0;
+
        return 1;
 }
 
@@ -2208,6 +2222,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        tcp_ses->tcpStatus = CifsNew;
        ++tcp_ses->srv_count;
 
+       if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+               volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+               tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+       else
+               tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
        rc = ip_connect(tcp_ses);
        if (rc < 0) {
                cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -2237,7 +2257,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        cifs_fscache_get_client_cookie(tcp_ses);
 
        /* queue echo request delayed work */
-       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
 
        return tcp_ses;
 
index 0a2752b..ff882ae 100644 (file)
@@ -2267,7 +2267,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2292,7 +2292,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2309,7 +2309,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2326,7 +2326,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2672,7 +2672,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = generic_write_checks(iocb, from);
        if (rc <= 0)
@@ -2685,7 +2685,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
        else
                rc = -EACCES;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (rc > 0) {
                ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
index a329f5b..aeb26db 100644 (file)
@@ -814,8 +814,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                        }
                } else
                        fattr.cf_uniqueid = iunique(sb, ROOT_I);
-       } else
-               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       } else {
+               if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+                   validinum == false && server->ops->get_srv_inum) {
+                       /*
+                        * Pass a NULL tcon to ensure we don't make a round
+                        * trip to the server. This only works for SMB2+.
+                        */
+                       tmprc = server->ops->get_srv_inum(xid,
+                               NULL, cifs_sb, full_path,
+                               &fattr.cf_uniqueid, data);
+                       if (tmprc)
+                               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+               } else
+                       fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       }
 
        /* query for SFU type info if supported and needed */
        if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -856,6 +869,13 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
        } else {
                /* we already have inode, update it */
 
+               /* if uniqueid is different, return error */
+               if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+                   CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+                       rc = -ESTALE;
+                       goto cgii_exit;
+               }
+
                /* if filetype is different, return error */
                if (unlikely(((*inode)->i_mode & S_IFMT) !=
                    (fattr.cf_mode & S_IFMT))) {
index 8442b8b..813fe13 100644 (file)
@@ -310,7 +310,7 @@ check_smb_hdr(struct smb_hdr *smb)
 }
 
 int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
 {
        struct smb_hdr *smb = (struct smb_hdr *)buf;
        __u32 rfclen = be32_to_cpu(smb->smb_buf_length);
index 0557c45..b30a4a6 100644 (file)
@@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
                 * if buggy server returns . and .. late do we want to
                 * check for that here?
                 */
+               *tmp_buf = 0;
                rc = cifs_filldir(current_entry, file, ctx,
                                  tmp_buf, max_len);
                if (rc) {
index 1c59070..389fb9f 100644 (file)
@@ -38,7 +38,7 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
         * Make sure that this really is an SMB, that it is a response,
         * and that the message ids match.
         */
-       if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+       if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
            (mid == wire_mid)) {
                if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
                        return 0;
@@ -50,9 +50,9 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
                                cifs_dbg(VFS, "Received Request not response\n");
                }
        } else { /* bad signature or mid */
-               if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+               if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
                        cifs_dbg(VFS, "Bad protocol string signature header %x\n",
-                                *(unsigned int *) hdr->ProtocolId);
+                                le32_to_cpu(hdr->ProtocolId));
                if (mid != wire_mid)
                        cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
                                 mid, wire_mid);
@@ -93,11 +93,11 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 };
 
 int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 {
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
-       __u64 mid = le64_to_cpu(hdr->MessageId);
+       __u64 mid;
        __u32 len = get_rfc1002_length(buf);
        __u32 clc_len;  /* calculated length */
        int command;
@@ -111,6 +111,30 @@ smb2_check_message(char *buf, unsigned int length)
         * ie Validate the wct via smb2_struct_sizes table above
         */
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               struct smb2_transform_hdr *thdr =
+                       (struct smb2_transform_hdr *)buf;
+               struct cifs_ses *ses = NULL;
+               struct list_head *tmp;
+
+               /* decrypt frame now that it is completely read in */
+               spin_lock(&cifs_tcp_ses_lock);
+               list_for_each(tmp, &srvr->smb_ses_list) {
+                       ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+                       if (ses->Suid == thdr->SessionId)
+                               break;
+
+                       ses = NULL;
+               }
+               spin_unlock(&cifs_tcp_ses_lock);
+               if (ses == NULL) {
+                       cifs_dbg(VFS, "no decryption - session id not found\n");
+                       return 1;
+               }
+       }
+
+
+       mid = le64_to_cpu(hdr->MessageId);
        if (length < sizeof(struct smb2_pdu)) {
                if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
                        pdu->StructureSize2 = 0;
@@ -322,7 +346,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 
        /* return pointer to beginning of data area, ie offset from SMB start */
        if ((*off != 0) && (*len != 0))
-               return (char *)(&hdr->ProtocolId[0]) + *off;
+               return (char *)(&hdr->ProtocolId) + *off;
        else
                return NULL;
 }
index 53ccdde..3525ed7 100644 (file)
@@ -182,6 +182,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        __u64 wire_mid = le64_to_cpu(hdr->MessageId);
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+               return NULL;
+       }
+
        spin_lock(&GlobalMid_Lock);
        list_for_each_entry(mid, &server->pending_mid_q, qhead) {
                if ((mid->mid == wire_mid) &&
@@ -1692,7 +1697,7 @@ struct smb_version_operations smb30_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb30signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1779,7 +1784,7 @@ struct smb_version_operations smb311_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb311signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1838,7 +1843,7 @@ struct smb_version_values smb21_values = {
 struct smb_version_values smb30_values = {
        .version_string = SMB30_VERSION_STRING,
        .protocol_id = SMB30_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -1858,7 +1863,7 @@ struct smb_version_values smb30_values = {
 struct smb_version_values smb302_values = {
        .version_string = SMB302_VERSION_STRING,
        .protocol_id = SMB302_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
index 7675555..10f8d5c 100644 (file)
@@ -97,10 +97,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
        hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
                        - 4 /*  RFC 1001 length field itself not counted */);
 
-       hdr->ProtocolId[0] = 0xFE;
-       hdr->ProtocolId[1] = 'S';
-       hdr->ProtocolId[2] = 'M';
-       hdr->ProtocolId[3] = 'B';
+       hdr->ProtocolId = SMB2_PROTO_NUMBER;
        hdr->StructureSize = cpu_to_le16(64);
        hdr->Command = smb2_cmd;
        hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
@@ -1573,7 +1570,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
                goto ioctl_exit;
        }
 
-       memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+       memcpy(*out_data,
+              (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
               *plen);
 ioctl_exit:
        free_rsp_buf(resp_buftype, rsp);
@@ -2093,7 +2091,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
        }
 
        if (*buf) {
-               memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+               memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
                       *nbytes);
                free_rsp_buf(resp_buftype, iov[0].iov_base);
        } else if (resp_buftype != CIFS_NO_BUFFER) {
index 4af5278..ff88d9f 100644 (file)
@@ -86,6 +86,7 @@
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
 
 /*
  * SMB2 Header Definition
@@ -102,7 +103,7 @@ struct smb2_hdr {
        __be32 smb2_buf_length; /* big endian on wire */
                                /* length is only two or three bytes - with
                                 one or two byte type preceding it that MBZ */
-       __u8   ProtocolId[4];   /* 0xFE 'S' 'M' 'B' */
+       __le32 ProtocolId;      /* 0xFE 'S' 'M' 'B' */
        __le16 StructureSize;   /* 64 */
        __le16 CreditCharge;    /* MBZ */
        __le32 Status;          /* Error from server */
@@ -128,11 +129,10 @@ struct smb2_transform_hdr {
                                 one or two byte type preceding it that MBZ */
        __u8   ProtocolId[4];   /* 0xFD 'S' 'M' 'B' */
        __u8   Signature[16];
-       __u8   Nonce[11];
-       __u8   Reserved[5];
+       __u8   Nonce[16];
        __le32 OriginalMessageSize;
        __u16  Reserved1;
-       __le16 EncryptionAlgorithm;
+       __le16 Flags; /* EncryptionAlgorithm */
        __u64  SessionId;
 } __packed;
 
index 79dc650..4f07dc9 100644 (file)
@@ -34,7 +34,8 @@ struct smb_rqst;
  *****************************************************************
  */
 extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+                             struct TCP_Server_Info *server);
 extern unsigned int smb2_calc_size(void *buf);
 extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
index d4c5b6f..8732a43 100644 (file)
@@ -222,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        return rc;
 }
 
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+                       struct kvec context, __u8 *key, unsigned int key_size)
 {
        unsigned char zero = 0x0;
        __u8 i[4] = {0, 0, 0, 1};
@@ -233,7 +233,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        unsigned char *hashptr = prfhash;
 
        memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
-       memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+       memset(key, 0x0, key_size);
 
        rc = smb3_crypto_shash_allocate(ses->server);
        if (rc) {
@@ -262,7 +262,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SMB2AESCMAC", 12);
+                               label.iov_base, label.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
                goto smb3signkey_ret;
@@ -276,7 +276,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SmbSign", 8);
+                               context.iov_base, context.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
                goto smb3signkey_ret;
@@ -296,12 +296,102 @@ generate_smb3signingkey(struct cifs_ses *ses)
                goto smb3signkey_ret;
        }
 
-       memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+       memcpy(key, hashptr, key_size);
 
 smb3signkey_ret:
        return rc;
 }
 
+struct derivation {
+       struct kvec label;
+       struct kvec context;
+};
+
+struct derivation_triplet {
+       struct derivation signing;
+       struct derivation encryption;
+       struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+                       const struct derivation_triplet *ptriplet)
+{
+       int rc;
+
+       rc = generate_key(ses, ptriplet->signing.label,
+                         ptriplet->signing.context, ses->smb3signingkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       rc = generate_key(ses, ptriplet->encryption.label,
+                         ptriplet->encryption.context, ses->smb3encryptionkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       return generate_key(ses, ptriplet->decryption.label,
+                           ptriplet->decryption.context,
+                           ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
index 2a24c52..87abe8e 100644 (file)
@@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
        cifs_in_send_dec(server);
        cifs_save_when_sent(mid);
 
-       if (rc < 0)
+       if (rc < 0) {
                server->sequence_number -= 2;
+               cifs_delete_mid(mid);
+       }
+
        mutex_unlock(&server->srv_mutex);
 
        if (rc == 0)
                return 0;
 
-       cifs_delete_mid(mid);
        add_credits_and_wake_if(server, credits, optype);
        return rc;
 }
index f829fe9..5104d84 100644 (file)
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
 } while (0)
 
 
-#define CODA_FREE(ptr,size) \
-    do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
 
 /* inode to cnode access functions */
 
index fda9f43..42e731b 100644 (file)
@@ -427,13 +427,13 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
        if (host_file->f_op->iterate) {
                struct inode *host_inode = file_inode(host_file);
 
-               mutex_lock(&host_inode->i_mutex);
+               inode_lock(host_inode);
                ret = -ENOENT;
                if (!IS_DEADDIR(host_inode)) {
                        ret = host_file->f_op->iterate(host_file, ctx);
                        file_accessed(host_file);
                }
-               mutex_unlock(&host_inode->i_mutex);
+               inode_unlock(host_inode);
                return ret;
        }
        /* Venus: we must read Venus dirents from a file */
index 1da3805..f47c748 100644 (file)
@@ -71,12 +71,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 
        host_file = cfi->cfi_container;
        file_start_write(host_file);
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
        ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
        coda_inode->i_size = file_inode(host_file)->i_size;
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
        coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
        file_end_write(host_file);
        return ret;
 }
@@ -203,7 +203,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
 
        cfi = CODA_FTOC(coda_file);
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -212,7 +212,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = vfs_fsync(host_file, datasync);
        if (!err && !datasync)
                err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
 
        return err;
 }
index a5b8eb6..6402eaf 100644 (file)
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
+COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
+COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
 COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
 COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
 COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
index cab612b..f419519 100644 (file)
@@ -640,13 +640,13 @@ static void detach_groups(struct config_group *group)
 
                child = sd->s_dentry;
 
-               mutex_lock(&d_inode(child)->i_mutex);
+               inode_lock(d_inode(child));
 
                configfs_detach_group(sd->s_element);
                d_inode(child)->i_flags |= S_DEAD;
                dont_mount(child);
 
-               mutex_unlock(&d_inode(child)->i_mutex);
+               inode_unlock(d_inode(child));
 
                d_delete(child);
                dput(child);
@@ -834,11 +834,11 @@ static int configfs_attach_item(struct config_item *parent_item,
                         * the VFS may already have hit and used them. Thus,
                         * we must lock them as rmdir() would.
                         */
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        configfs_remove_dir(item);
                        d_inode(dentry)->i_flags |= S_DEAD;
                        dont_mount(dentry);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        d_delete(dentry);
                }
        }
@@ -874,7 +874,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * We must also lock the inode to remove it safely in case of
                 * error, as rmdir() would.
                 */
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
@@ -883,7 +883,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                        dont_mount(dentry);
                }
                configfs_adjust_dir_dirent_depth_after_populate(sd);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
                if (ret)
                        d_delete(dentry);
        }
@@ -1135,7 +1135,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
         * subsystem is really registered, and so we need to lock out
         * configfs_[un]register_subsystem().
         */
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
        if (!subsys_sd) {
@@ -1147,7 +1147,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
        ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
 
 out_unlock_fs:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        /*
         * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1230,7 +1230,7 @@ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys,
                 * additional locking to prevent other subsystem from being
                 * unregistered
                 */
-               mutex_lock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_lock(d_inode(root->cg_item.ci_dentry));
 
                /*
                 * As we are trying to depend item from other subsystem
@@ -1254,7 +1254,7 @@ out_root_unlock:
                 * We were called from subsystem other than our target so we
                 * took some locks so now it's time to release them
                 */
-               mutex_unlock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_unlock(d_inode(root->cg_item.ci_dentry));
 
        return ret;
 }
@@ -1561,7 +1561,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
        down_write(&configfs_rename_sem);
        parent = item->parent->dentry;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
@@ -1577,7 +1577,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        up_write(&configfs_rename_sem);
 
        return error;
@@ -1590,7 +1590,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        int err;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        /*
         * Fake invisibility if dir belongs to a group/default groups hierarchy
         * being attached
@@ -1603,7 +1603,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
                else
                        err = 0;
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        return err;
 }
@@ -1613,11 +1613,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * cursor = file->private_data;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        spin_lock(&configfs_dirent_lock);
        list_del_init(&cursor->s_sibling);
        spin_unlock(&configfs_dirent_lock);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        release_configfs_dirent(cursor);
 
@@ -1698,7 +1698,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry * dentry = file->f_path.dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -1706,7 +1706,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -1732,7 +1732,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&configfs_dirent_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 
@@ -1767,14 +1767,14 @@ int configfs_register_group(struct config_group *parent_group,
 
        parent = parent_group->cg_item.ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        ret = create_default_group(parent_group, group);
        if (!ret) {
                spin_lock(&configfs_dirent_lock);
                configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
                spin_unlock(&configfs_dirent_lock);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return ret;
 }
 EXPORT_SYMBOL(configfs_register_group);
@@ -1791,7 +1791,7 @@ void configfs_unregister_group(struct config_group *group)
        struct dentry *dentry = group->cg_item.ci_dentry;
        struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        spin_lock(&configfs_dirent_lock);
        configfs_detach_prep(dentry, NULL);
        spin_unlock(&configfs_dirent_lock);
@@ -1800,7 +1800,7 @@ void configfs_unregister_group(struct config_group *group)
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
        d_delete(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        dput(dentry);
 
@@ -1872,7 +1872,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        sd = root->d_fsdata;
        link_group(to_config_group(sd->s_element), group);
 
-       mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
 
        err = -ENOMEM;
        dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1892,7 +1892,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                }
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        if (err) {
                unlink_group(group);
@@ -1913,9 +1913,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                return;
        }
 
-       mutex_lock_nested(&d_inode(root)->i_mutex,
+       inode_lock_nested(d_inode(root),
                          I_MUTEX_PARENT);
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
@@ -1926,11 +1926,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        configfs_detach_group(&group->cg_item);
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        d_delete(dentry);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        dput(dentry);
 
index 3687187..33b7ee3 100644 (file)
@@ -540,10 +540,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
        umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
                                     CONFIGFS_ITEM_ATTR);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        return error;
 }
@@ -562,10 +562,10 @@ int configfs_create_bin_file(struct config_item *item,
        umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
                                     CONFIGFS_ITEM_BIN_ATTR);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        return error;
 }
index 0cc810e..cee087d 100644 (file)
@@ -255,7 +255,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                /* no inode means this hasn't been made visible yet */
                return;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
index 7af8797..fc2e314 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
@@ -57,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
        blk_queue_exit(bdev->bd_queue);
 }
 
+struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+{
+       struct page *page = alloc_pages(GFP_KERNEL, 0);
+       struct blk_dax_ctl dax = {
+               .size = PAGE_SIZE,
+               .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
+       };
+       long rc;
+
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+
+       rc = dax_map_atomic(bdev, &dax);
+       if (rc < 0)
+               return ERR_PTR(rc);
+       memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
+       dax_unmap_atomic(bdev, &dax);
+       return page;
+}
+
 /*
  * dax_clear_blocks() is called from within transaction context from XFS,
  * and hence this means the stack from this point must follow GFP_NOFS
@@ -245,13 +266,14 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        loff_t end = pos + iov_iter_count(iter);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
                struct address_space *mapping = inode->i_mapping;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = filemap_write_and_wait_range(mapping, pos, end - 1);
                if (retval) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -263,7 +285,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if ((retval > 0) && end_io)
                end_io(iocb, pos, retval, bh.b_private);
@@ -324,6 +346,200 @@ static int copy_user_bh(struct page *to, struct inode *inode,
        return 0;
 }
 
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+               sector_t sector, bool pmd_entry, bool dirty)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       pgoff_t pmd_index = DAX_PMD_INDEX(index);
+       int type, error = 0;
+       void *entry;
+
+       WARN_ON_ONCE(pmd_entry && !dirty);
+       if (dirty)
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+       spin_lock_irq(&mapping->tree_lock);
+
+       entry = radix_tree_lookup(page_tree, pmd_index);
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+               index = pmd_index;
+               goto dirty;
+       }
+
+       entry = radix_tree_lookup(page_tree, index);
+       if (entry) {
+               type = RADIX_DAX_TYPE(entry);
+               if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+                                       type != RADIX_DAX_PMD)) {
+                       error = -EIO;
+                       goto unlock;
+               }
+
+               if (!pmd_entry || type == RADIX_DAX_PMD)
+                       goto dirty;
+
+               /*
+                * We only insert dirty PMD entries into the radix tree.  This
+                * means we don't need to worry about removing a dirty PTE
+                * entry and inserting a clean PMD entry, thus reducing the
+                * range we would flush with a follow-up fsync/msync call.
+                */
+               radix_tree_delete(&mapping->page_tree, index);
+               mapping->nrexceptional--;
+       }
+
+       if (sector == NO_SECTOR) {
+               /*
+                * This can happen during correct operation if our pfn_mkwrite
+                * fault raced against a hole punch operation.  If this
+                * happens the pte that was hole punched will have been
+                * unmapped and the radix tree entry will have been removed by
+                * the time we are called, but the call will still happen.  We
+                * will return all the way up to wp_pfn_shared(), where the
+                * pte_same() check will fail, eventually causing page fault
+                * to be retried by the CPU.
+                */
+               goto unlock;
+       }
+
+       error = radix_tree_insert(page_tree, index,
+                       RADIX_DAX_ENTRY(sector, pmd_entry));
+       if (error)
+               goto unlock;
+
+       mapping->nrexceptional++;
+ dirty:
+       if (dirty)
+               radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+               struct address_space *mapping, pgoff_t index, void *entry)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       int type = RADIX_DAX_TYPE(entry);
+       struct radix_tree_node *node;
+       struct blk_dax_ctl dax;
+       void **slot;
+       int ret = 0;
+
+       spin_lock_irq(&mapping->tree_lock);
+       /*
+        * Regular page slots are stabilized by the page lock even
+        * without the tree itself locked.  These unlocked entries
+        * need verification under the tree lock.
+        */
+       if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+               goto unlock;
+       if (*slot != entry)
+               goto unlock;
+
+       /* another fsync thread may have already written back this entry */
+       if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+               goto unlock;
+
+       if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+               ret = -EIO;
+               goto unlock;
+       }
+
+       dax.sector = RADIX_DAX_SECTOR(entry);
+       dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+       spin_unlock_irq(&mapping->tree_lock);
+
+       /*
+        * We cannot hold tree_lock while calling dax_map_atomic() because it
+        * eventually calls cond_resched().
+        */
+       ret = dax_map_atomic(bdev, &dax);
+       if (ret < 0)
+               return ret;
+
+       if (WARN_ON_ONCE(ret < dax.size)) {
+               ret = -EIO;
+               goto unmap;
+       }
+
+       wb_cache_pmem(dax.addr, dax.size);
+
+       spin_lock_irq(&mapping->tree_lock);
+       radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+       spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+       dax_unmap_atomic(bdev, &dax);
+       return ret;
+
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+               loff_t end)
+{
+       struct inode *inode = mapping->host;
+       struct block_device *bdev = inode->i_sb->s_bdev;
+       pgoff_t start_index, end_index, pmd_index;
+       pgoff_t indices[PAGEVEC_SIZE];
+       struct pagevec pvec;
+       bool done = false;
+       int i, ret = 0;
+       void *entry;
+
+       if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+               return -EIO;
+
+       start_index = start >> PAGE_CACHE_SHIFT;
+       end_index = end >> PAGE_CACHE_SHIFT;
+       pmd_index = DAX_PMD_INDEX(start_index);
+
+       rcu_read_lock();
+       entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+       rcu_read_unlock();
+
+       /* see if the start of our range is covered by a PMD entry */
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+               start_index = pmd_index;
+
+       tag_pages_for_writeback(mapping, start_index, end_index);
+
+       pagevec_init(&pvec, 0);
+       while (!done) {
+               pvec.nr = find_get_entries_tag(mapping, start_index,
+                               PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+                               pvec.pages, indices);
+
+               if (pvec.nr == 0)
+                       break;
+
+               for (i = 0; i < pvec.nr; i++) {
+                       if (indices[i] > end_index) {
+                               done = true;
+                               break;
+                       }
+
+                       ret = dax_writeback_one(bdev, mapping, indices[i],
+                                       pvec.pages[i]);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+       wmb_pmem();
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -363,6 +579,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        }
        dax_unmap_atomic(bdev, &dax);
 
+       error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+                       vmf->flags & FAULT_FLAG_WRITE);
+       if (error)
+               goto out;
+
        error = vm_insert_mixed(vma, vaddr, dax.pfn);
 
  out:
@@ -408,6 +629,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
        memset(&bh, 0, sizeof(bh));
        block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_SIZE;
 
  repeat:
@@ -487,6 +709,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                delete_from_page_cache(page);
                unlock_page(page);
                page_cache_release(page);
+               page = NULL;
        }
 
        /*
@@ -590,7 +813,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        struct block_device *bdev;
        pgoff_t size, pgoff;
        sector_t block;
-       int result = 0;
+       int error, result = 0;
+       bool alloc = false;
 
        /* dax pmd mappings require pfn_t_devmap() */
        if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +848,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        }
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
        bh.b_size = PMD_SIZE;
-       if (get_block(inode, block, &bh, write) != 0)
+
+       if (get_block(inode, block, &bh, 0) != 0)
                return VM_FAULT_SIGBUS;
+
+       if (!buffer_mapped(&bh) && write) {
+               if (get_block(inode, block, &bh, 1) != 0)
+                       return VM_FAULT_SIGBUS;
+               alloc = true;
+       }
+
        bdev = bh.b_bdev;
-       i_mmap_lock_read(mapping);
 
        /*
         * If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +871,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         */
        if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
                dax_pmd_dbg(&bh, address, "allocated block too small");
-               goto fallback;
+               return VM_FAULT_FALLBACK;
        }
 
        /*
         * If we allocated new storage, make sure no process has any
         * zero pages covering this hole
         */
-       if (buffer_new(&bh)) {
-               i_mmap_unlock_read(mapping);
-               unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-               i_mmap_lock_read(mapping);
+       if (alloc) {
+               loff_t lstart = pgoff << PAGE_SHIFT;
+               loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+               truncate_pagecache_range(inode, lstart, lend);
        }
 
+       i_mmap_lock_read(mapping);
+
        /*
         * If a truncate happened while we were allocating blocks, we may
         * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -664,7 +899,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                goto out;
        }
        if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(&bh, address, "pgoff unaligned");
+               dax_pmd_dbg(&bh, address,
+                               "offset + huge page size > file size");
                goto fallback;
        }
 
@@ -732,6 +968,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                }
                dax_unmap_atomic(bdev, &dax);
 
+               /*
+                * For PTE faults we insert a radix tree entry for reads, and
+                * leave it clean.  Then on the first write we dirty the radix
+                * tree entry via the dax_pfn_mkwrite() path.  This sequence
+                * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+                * call into get_block() to translate the pgoff to a sector in
+                * order to be able to create a new radix tree entry.
+                *
+                * The PMD path doesn't have an equivalent to
+                * dax_pfn_mkwrite(), though, so for a read followed by a
+                * write we traverse all the way through __dax_pmd_fault()
+                * twice.  This means we can just skip inserting a radix tree
+                * entry completely on the initial read and just wait until
+                * the write to insert a dirty entry.
+                */
+               if (write) {
+                       error = dax_radix_entry(mapping, pgoff, dax.sector,
+                                       true, true);
+                       if (error) {
+                               dax_pmd_dbg(&bh, address,
+                                               "PMD radix insertion failed");
+                               goto fallback;
+                       }
+               }
+
                dev_dbg(part_to_dev(bdev->bd_part),
                                "%s: %s addr: %lx pfn: %lx sect: %llx\n",
                                __func__, current->comm, address,
@@ -790,15 +1051,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
- *
  */
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct file *file = vma->vm_file;
 
-       sb_start_pagefault(sb);
-       file_update_time(vma->vm_file);
-       sb_end_pagefault(sb);
+       /*
+        * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+        * RADIX_DAX_PTE entry already exists in the radix tree from a
+        * previous call to __dax_fault().  We just want to look up that PTE
+        * entry using vmf->pgoff and make sure the dirty tag is set.  This
+        * saves us from having to make a call to get_block() here to look
+        * up the sector.
+        */
+       dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1101,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
        BUG_ON((offset + length) > PAGE_CACHE_SIZE);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_CACHE_SIZE;
        err = get_block(inode, index, &bh, 0);
        if (err < 0)
index b4539e8..92d5140 100644 (file)
@@ -2462,7 +2462,7 @@ EXPORT_SYMBOL(d_rehash);
  */
 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 {
-       BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+       BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
        spin_lock(&dentry->d_lock);
@@ -2738,7 +2738,7 @@ static int __d_unalias(struct inode *inode,
        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
                goto out_err;
        m1 = &dentry->d_sb->s_vfs_rename_mutex;
-       if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+       if (!inode_trylock(alias->d_parent->d_inode))
                goto out_err;
        m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
index b7fcc0d..bece948 100644 (file)
@@ -265,7 +265,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = debugfs_mount->mnt_root;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                dput(dentry);
@@ -273,7 +273,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }
 
@@ -282,7 +282,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        return NULL;
@@ -290,7 +290,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        return dentry;
 }
 
@@ -560,9 +560,9 @@ void debugfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        ret = __debugfs_remove(dentry, parent);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret)
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
@@ -594,7 +594,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -609,7 +609,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&d_inode(parent)->i_mutex);
+                       inode_unlock(d_inode(parent));
                        parent = child;
                        goto down;
                }
@@ -630,10 +630,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        if (child != dentry)
                /* go up */
@@ -641,7 +641,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        if (!__debugfs_remove(child, parent))
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
index c35ffdc..1f107fd 100644 (file)
@@ -255,7 +255,7 @@ static int mknod_ptmx(struct super_block *sb)
        if (!uid_valid(root_uid) || !gid_valid(root_gid))
                return -EINVAL;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        /* If we have already created ptmx node, return */
        if (fsi->ptmx_dentry) {
@@ -292,7 +292,7 @@ static int mknod_ptmx(struct super_block *sb)
        fsi->ptmx_dentry = dentry;
        rc = 0;
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return rc;
 }
 
@@ -615,7 +615,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 
        sprintf(s, "%d", index);
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, s);
        if (dentry) {
@@ -626,7 +626,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
                inode = ERR_PTR(-ENOMEM);
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return inode;
 }
@@ -671,7 +671,7 @@ void devpts_pty_kill(struct inode *inode)
 
        BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_find_alias(inode);
 
@@ -680,7 +680,7 @@ void devpts_pty_kill(struct inode *inode)
        dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
        dput(dentry);           /* d_find_alias above */
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 }
 
 static int __init init_devpts_fs(void)
index 602e844..1b2f7ff 100644 (file)
@@ -1157,12 +1157,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                                        iocb->ki_filp->f_mapping;
 
                        /* will be released by direct_io_worker */
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
 
                        retval = filemap_write_and_wait_range(mapping, offset,
                                                              end - 1);
                        if (retval) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                kmem_cache_free(dio_cache, dio);
                                goto out;
                        }
@@ -1173,7 +1173,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        dio->i_size = i_size_read(inode);
        if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
                if (dio->flags & DIO_LOCKING)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                kmem_cache_free(dio_cache, dio);
                retval = 0;
                goto out;
@@ -1295,7 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * of protecting us from looking up uninitialized blocks.
         */
        if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
-               mutex_unlock(&dio->inode->i_mutex);
+               inode_unlock(dio->inode);
 
        /*
         * The only time we want to leave bios in flight is when a successful
index 1925d6d..58c2f4a 100644 (file)
@@ -516,7 +516,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                return -EINVAL;
 
        kbuf = memdup_user_nul(buf, count);
-       if (!IS_ERR(kbuf))
+       if (IS_ERR(kbuf))
                return PTR_ERR(kbuf);
 
        if (check_version(kbuf)) {
index 040aa87..4e685ac 100644 (file)
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
        struct dentry *dir;
 
        dir = dget_parent(dentry);
-       mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        return dir;
 }
 
 static void unlock_dir(struct dentry *dir)
 {
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
 }
 
@@ -397,11 +397,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        int rc = 0;
 
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                      lower_dir_dentry,
                                      ecryptfs_dentry->d_name.len);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -426,11 +426,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out;
        }
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                      lower_dir_dentry,
                                      encrypted_and_encoded_name_size);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -869,9 +869,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
        if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
                struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
-               mutex_lock(&d_inode(lower_dentry)->i_mutex);
+               inode_lock(d_inode(lower_dentry));
                rc = notify_change(lower_dentry, &lower_ia, NULL);
-               mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+               inode_unlock(d_inode(lower_dentry));
        }
        return rc;
 }
@@ -970,9 +970,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                lower_ia.ia_valid &= ~ATTR_MODE;
 
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = notify_change(lower_dentry, &lower_ia, NULL);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        fsstack_copy_attr_all(inode, lower_inode);
        return rc;
@@ -1048,10 +1048,10 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
                                                   size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1075,9 +1075,9 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1092,9 +1092,9 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
index caba848..c6ced4c 100644 (file)
@@ -436,7 +436,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
                rc = -ENOMEM;
                goto out;
        }
-       mutex_lock(&lower_inode->i_mutex);
+       inode_lock(lower_inode);
        size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                           xattr_virt, PAGE_CACHE_SIZE);
        if (size < 0)
@@ -444,7 +444,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
        rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                         xattr_virt, size, 0);
-       mutex_unlock(&lower_inode->i_mutex);
+       inode_unlock(lower_inode);
        if (rc)
                printk(KERN_ERR "Error whilst attempting to write inode size "
                       "to lower file xattr; rc = [%d]\n", rc);
index 90001da..c424e48 100644 (file)
@@ -50,9 +50,9 @@ static ssize_t efivarfs_file_write(struct file *file,
                d_delete(file->f_path.dentry);
                dput(file->f_path.dentry);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, datasize + sizeof(attributes));
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        bytes = count;
index 86a2121..b8a564f 100644 (file)
@@ -160,10 +160,10 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        efivar_entry_size(entry, &size);
        efivar_entry_add(entry, &efivarfs_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode->i_private = entry;
        i_size_write(inode, size + sizeof(entry->var.Attributes));
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        d_add(dentry, inode);
 
        return 0;
index ae1dbcf..cde6074 100644 (file)
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
 
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+                               EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
+
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
         * wait list.
         */
        if (waitqueue_active(&ep->wq)) {
-               ewake = 1;
+               if ((epi->event.events & EPOLLEXCLUSIVE) &&
+                                       !((unsigned long)key & POLLFREE)) {
+                       switch ((unsigned long)key & EPOLLINOUT_BITS) {
+                       case POLLIN:
+                               if (epi->event.events & POLLIN)
+                                       ewake = 1;
+                               break;
+                       case POLLOUT:
+                               if (epi->event.events & POLLOUT)
+                                       ewake = 1;
+                               break;
+                       case 0:
+                               ewake = 1;
+                               break;
+                       }
+               }
                wake_up_locked(&ep->wq);
        }
        if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
         * Also, we do not currently supported nested exclusive wakeups.
         */
-       if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD ||
-               (op == EPOLL_CTL_ADD && is_file_epoll(tf.file))))
-               goto error_tgt_fput;
+       if (epds.events & EPOLLEXCLUSIVE) {
+               if (op == EPOLL_CTL_MOD)
+                       goto error_tgt_fput;
+               if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+                               (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+                       goto error_tgt_fput;
+       }
 
        /*
         * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                break;
        case EPOLL_CTL_MOD:
                if (epi) {
-                       epds.events |= POLLERR | POLLHUP;
-                       error = ep_modify(ep, epi, &epds);
+                       if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+                               epds.events |= POLLERR | POLLHUP;
+                               error = ep_modify(ep, epi, &epds);
+                       }
                } else
                        error = -ENOENT;
                break;
index 828ec5f..dcd4ac7 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1307,13 +1307,13 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
                return;
 
        /* Be careful if suid/sgid is set */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* reload atomically mode/uid/gid now that lock held */
        mode = inode->i_mode;
        uid = inode->i_uid;
        gid = inode->i_gid;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /* We ignore suid/sgid if there are no mappings for them in the ns */
        if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
index 906de66..28645f0 100644 (file)
@@ -52,9 +52,9 @@ static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_inode_metadata(filp->f_mapping->host, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 714cd37..c46f1a1 100644 (file)
@@ -124,10 +124,10 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        int err;
 
        parent = ERR_PTR(-EACCES);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (mnt->mnt_sb->s_export_op->get_parent)
                parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 
        if (IS_ERR(parent)) {
                dprintk("%s: get_parent of %ld failed, err %d\n",
@@ -143,9 +143,9 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        if (err)
                goto out_err;
        dprintk("%s: found name: %s\n", __func__, nbuf);
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(tmp)) {
                dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
                goto out_err;
@@ -503,10 +503,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
                 */
                err = exportfs_get_name(mnt, target_dir, nbuf, result);
                if (!err) {
-                       mutex_lock(&target_dir->d_inode->i_mutex);
+                       inode_lock(target_dir->d_inode);
                        nresult = lookup_one_len(nbuf, target_dir,
                                                 strlen(nbuf));
-                       mutex_unlock(&target_dir->d_inode->i_mutex);
+                       inode_unlock(target_dir->d_inode);
                        if (!IS_ERR(nresult)) {
                                if (nresult->d_inode) {
                                        dput(result);
index 11a42c5..2c88d68 100644 (file)
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 {
        struct inode *inode = file_inode(vma->vm_file);
        struct ext2_inode_info *ei = EXT2_I(inode);
-       int ret = VM_FAULT_NOPAGE;
        loff_t size;
+       int ret;
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
 
        up_read(&ei->dax_sem);
        sb_end_pagefault(inode->i_sb);
index 5d46c09..b386af2 100644 (file)
@@ -51,10 +51,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext2_mask_flags(inode->i_mode, flags);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Is it quota file? Do not allow user to mess with it */
                if (IS_NOQUOTA(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto setflags_out;
                }
@@ -68,7 +68,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                 */
                if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                ret = -EPERM;
                                goto setflags_out;
                        }
@@ -80,7 +80,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                ext2_set_inode_flags(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setflags_out:
@@ -102,10 +102,10 @@ setflags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                inode->i_generation = generation;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setversion_out:
index 1a08350..c802120 100644 (file)
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
                                EXT4_DECRYPT, page->index, page, page);
 }
 
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len)
 {
        struct ext4_crypto_ctx  *ctx;
        struct page             *ciphertext_page = NULL;
        struct bio              *bio;
-       ext4_lblk_t             lblk = le32_to_cpu(ex->ee_block);
-       ext4_fsblk_t            pblk = ext4_ext_pblock(ex);
-       unsigned int            len = ext4_ext_get_actual_len(ex);
        int                     ret, err = 0;
 
 #if 0
index c5882b3..9a16d1e 100644 (file)
@@ -213,9 +213,11 @@ retry:
                res = -ENOKEY;
                goto out;
        }
+       down_read(&keyring_key->sem);
        ukp = user_key_payload(keyring_key);
        if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
                res = -EINVAL;
+               up_read(&keyring_key->sem);
                goto out;
        }
        master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
                            "ext4: key size incorrect: %d\n",
                            master_key->size);
                res = -ENOKEY;
+               up_read(&keyring_key->sem);
                goto out;
        }
        res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
                                  raw_key);
+       up_read(&keyring_key->sem);
        if (res)
                goto out;
 got_key:
index cc7ca4e..0662b28 100644 (file)
@@ -378,14 +378,22 @@ struct flex_groups {
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE           0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE           0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE          (EXT4_SYNC_FL | \
+                                        EXT4_IMMUTABLE_FL | \
+                                        EXT4_APPEND_FL | \
+                                        EXT4_NODUMP_FL | \
+                                        EXT4_NOATIME_FL | \
+                                        EXT4_PROJINHERIT_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
                           EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
-                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+                          EXT4_PROJINHERIT_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
        /* Request will not result in inode size update (user for fallocate) */
 #define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
-       /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK                        0x0100
        /* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0100
+       /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO                   0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO            (EXT4_GET_BLOCKS_CREATE |\
+                                       EXT4_GET_BLOCKS_ZERO)
 
 /*
  * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
 #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
 #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
 
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR              _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
         * by other means, so we have i_data_sem.
         */
        struct rw_semaphore i_data_sem;
+       /*
+        * i_mmap_sem is for serializing page faults with truncate / punch hole
+        * operations. We have to make sure that new page cannot be faulted in
+        * a section of the inode that is being punched. We cannot easily use
+        * i_data_sem for this since we need protection for the whole punch
+        * operation and i_data_sem ranks below transaction start so we have
+        * to occasionally drop it.
+        */
+       struct rw_semaphore i_mmap_sem;
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
 
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
        /* Encryption params */
        struct ext4_crypt_info *i_crypt_info;
 #endif
+       kprojid_t i_projid;
 };
 
 /*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
 #endif
 
 /* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
 
 /*
  * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,              ENCRYPT)
                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
                                         EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
-                                        EXT4_FEATURE_RO_COMPAT_QUOTA)
+                                        EXT4_FEATURE_RO_COMPAT_QUOTA |\
+                                        EXT4_FEATURE_RO_COMPAT_PROJECT)
 
 #define EXTN_FEATURE_FUNCS(ver) \
 static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
 #define        EXT4_DEF_RESUID         0
 #define        EXT4_DEF_RESGID         0
 
+/*
+ * Default project ID
+ */
+#define        EXT4_DEF_PROJID         0
+
 #define EXT4_DEF_INODE_READAHEAD_BLKS  32
 
 /*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
 struct page *ext4_encrypt(struct inode *inode,
                          struct page *plaintext_page);
 int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
                                struct buffer_head *bh_result, int create);
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                             ext4_fsblk_t pblk, ext4_lblk_t len);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2825,7 +2896,7 @@ do {                                                              \
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
 {
        WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        down_write(&EXT4_I(inode)->i_data_sem);
        if (newsize > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = newsize;
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
        return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len);
+
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
                                         struct page *page);
 extern int ext4_try_add_inline_entry(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
-                                    struct inode *inode);
+                                    struct inode *dir, struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
                                      struct inode *parent,
                                      struct inode *inode);
index 551353b..0ffabaf 100644 (file)
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
        ext4_fsblk_t ee_pblock;
        unsigned int ee_len;
-       int ret;
 
        ee_len    = ext4_ext_get_actual_len(ex);
        ee_pblock = ext4_ext_pblock(ex);
-
-       if (ext4_encrypted_inode(inode))
-               return ext4_encrypted_zeroout(inode, ex);
-
-       ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
-       if (ret > 0)
-               ret = 0;
-
-       return ret;
+       return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+                                 ee_len);
 }
 
 /*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
+               if (flags & EXT4_GET_BLOCKS_ZERO) {
+                       if (allocated > map->m_len)
+                               allocated = map->m_len;
+                       err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+                                                allocated);
+                       if (err < 0)
+                               goto out2;
+               }
                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                           ppath);
                if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
-
        /*
         * credits to insert 1 extent into extent tree
         */
@@ -4752,8 +4748,6 @@ retry:
                goto retry;
        }
 
-       ext4_inode_resume_unlocked_dio(inode);
-
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
-       struct address_space *mapping = inode->i_mapping;
        unsigned int blkbits = inode->i_blkbits;
 
        trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                        return ret;
        }
 
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               ret = filemap_write_and_wait_range(mapping, offset,
-                                                  offset + len - 1);
-               if (ret)
-                       return ret;
-       }
-
        /*
         * Round up offset. This is not fallocate, we neet to zero out
         * blocks, so convert interior block aligned part of the range to
@@ -4817,7 +4799,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        else
                max_blocks -= lblk;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Indirect files do not support unwritten extnets
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /* Preallocate the range including the unaligned edges */
        if (partial_begin || partial_end) {
                ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                 round_down(offset, 1 << blkbits)) >> blkbits,
                                new_size, flags, mode);
                if (ret)
-                       goto out_mutex;
+                       goto out_dio;
 
        }
 
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
                          EXT4_EX_NOCACHE);
 
-               /* Now release the pages and zero block aligned part of pages*/
+               /*
+                * Prevent page faults from reinstantiating pages we have
+                * released from page cache.
+                */
+               down_write(&EXT4_I(inode)->i_mmap_sem);
+               ret = ext4_update_disksize_before_punch(inode, offset, len);
+               if (ret) {
+                       up_write(&EXT4_I(inode)->i_mmap_sem);
+                       goto out_dio;
+               }
+               /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
                inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
-               /* Wait all existing dio workers, newcomers will block on i_mutex */
-               ext4_inode_block_unlocked_dio(inode);
-               inode_dio_wait(inode);
-
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags, mode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
                if (ret)
                        goto out_dio;
        }
@@ -4909,7 +4902,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -4980,7 +4973,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * We only support preallocation for extent-based files only
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                     flags, mode);
+       ext4_inode_resume_unlocked_dio(inode);
        if (ret)
                goto out;
 
@@ -5008,7 +5006,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                                                EXT4_I(inode)->i_sync_tid);
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
        return ret;
 }
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down offset to be aligned with page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /*
         * There is no need to overlap collapse range with EOF, in which case
         * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down offset to be aligned with page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /*
+        * Write tail of the last page before removed range since it will get
+        * removed from the page cache below.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+       if (ret)
+               goto out_mmap;
+       /*
+        * Write data that will be shifted to preserve them when discarding
+        * page cache below. We are also protected from pages becoming dirty
+        * by i_mmap_sem.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+                                          LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,10 +5583,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down to align start offset to page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                       LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /* Currently just for extent based files */
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        /* Expand file to avoid data loss if there is error while shifting */
@@ -5741,10 +5753,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5779,8 +5792,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
 
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
-       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-       BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+       BUG_ON(!inode_is_locked(inode1));
+       BUG_ON(!inode_is_locked(inode2));
 
        *erp = ext4_es_remove_extent(inode1, lblk1, count);
        if (unlikely(*erp))
index 113837e..1126436 100644 (file)
@@ -113,7 +113,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                ext4_unwritten_wait(inode);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0)
                goto out;
@@ -169,7 +169,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 
        ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
@@ -186,50 +186,42 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        return ret;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (aio_mutex)
                mutex_unlock(aio_mutex);
        return ret;
 }
 
 #ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
-       struct inode *inode = bh->b_assoc_map->host;
-       /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
-       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
-       int err;
-       if (!uptodate)
-               return;
-       WARN_ON(!buffer_unwritten(bh));
-       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        int result;
        handle_t *handle = NULL;
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
 
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                                EXT4_DATA_TRANS_BLOCKS(sb));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
-               result = __dax_fault(vma, vmf, ext4_get_block_dax,
-                                               ext4_end_io_unwritten);
+               result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
@@ -246,44 +238,88 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                ext4_chunk_trans_blocks(inode,
                                                        PMD_SIZE / PAGE_SIZE));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
                result = __dax_pmd_fault(vma, addr, pmd, flags,
-                               ext4_get_block_dax, ext4_end_io_unwritten);
+                               ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-                               ext4_end_io_unwritten);
+       int err;
+       struct inode *inode = file_inode(vma->vm_file);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(inode->i_sb);
+
+       return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+                               struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       loff_t size;
+       int ret;
+
+       sb_start_pagefault(sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       if (vmf->pgoff >= size)
+               ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(sb);
+
+       return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
        .pmd_fault      = ext4_dax_pmd_fault,
        .page_mkwrite   = ext4_dax_mkwrite,
-       .pfn_mkwrite    = dax_pfn_mkwrite,
+       .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops        ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = ext4_filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
 };
@@ -527,11 +563,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -579,7 +615,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                dataoff = (loff_t)last << blkbits;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (dataoff > isize)
                return -ENXIO;
@@ -600,11 +636,11 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -655,7 +691,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                break;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (holeoff > isize)
                holeoff = isize;
index 1b8024d..3fcfd50 100644 (file)
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
                inode->i_gid = dir->i_gid;
        } else
                inode_init_owner(inode, dir, mode);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+               ei->i_projid = EXT4_I(dir)->i_projid;
+       else
+               ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
        err = dquot_initialize(inode);
        if (err)
                goto out;
index d884989..dfe3b9b 100644 (file)
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  */
 static int ext4_add_dirent_to_inline(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
+                                    struct inode *dir,
                                     struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        int             err;
        struct ext4_dir_entry_2 *de;
 
@@ -1245,12 +1244,11 @@ out:
  * the new created block.
  */
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
-                             struct dentry *dentry, struct inode *inode)
+                             struct inode *dir, struct inode *inode)
 {
        int ret, inline_size;
        void *inline_start;
        struct ext4_iloc iloc;
-       struct inode *dir = d_inode(dentry->d_parent);
 
        ret = ext4_get_inode_loc(dir, &iloc);
        if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                                                 EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 
-       ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+       ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
                                        inline_start, inline_size);
        if (ret != -ENOSPC)
                goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
        if (inline_size) {
                inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 
-               ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+               ret = ext4_add_dirent_to_inline(handle, fname, dir,
                                                inode, &iloc, inline_start,
                                                inline_size);
 
index b3bd912..83bc8bf 100644 (file)
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
        return 0;
 }
 
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                      ext4_lblk_t len)
+{
+       int ret;
+
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+       ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
+
+       return ret;
+}
+
 #define check_block_validity(inode, map)       \
        __check_block_validity((inode), __func__, __LINE__, (map))
 
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         * out taking i_data_sem.  So at the time the unwritten extent
         * could be converted.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
        /*
         * We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                if (ret < 0)
                        retval = ret;
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
 found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -625,6 +636,22 @@ found:
                        WARN_ON(1);
                }
 
+               /*
+                * We have to zeroout blocks before inserting them into extent
+                * status tree. Otherwise someone could look them up there and
+                * use them before they are really zeroed.
+                */
+               if (flags & EXT4_GET_BLOCKS_ZERO &&
+                   map->m_flags & EXT4_MAP_MAPPED &&
+                   map->m_flags & EXT4_MAP_NEW) {
+                       ret = ext4_issue_zeroout(inode, map->m_lblk,
+                                                map->m_pblk, map->m_len);
+                       if (ret) {
+                               retval = ret;
+                               goto out_sem;
+                       }
+               }
+
                /*
                 * If the extent has been zeroed out, we don't need to update
                 * extent status tree.
@@ -632,7 +659,7 @@ found:
                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
                    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                        if (ext4_es_is_written(&es))
-                               goto has_zeroout;
+                               goto out_sem;
                }
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
                        status |= EXTENT_STATUS_DELAYED;
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                            map->m_pblk, status);
-               if (ret < 0)
+               if (ret < 0) {
                        retval = ret;
+                       goto out_sem;
+               }
        }
 
-has_zeroout:
+out_sem:
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                ret = check_block_validity(inode, map);
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
 
-       if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+       if (flags && !handle) {
                /* Direct IO write... */
                if (map.m_len > DIO_MAX_BLOCKS)
                        map.m_len = DIO_MAX_BLOCKS;
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh)) {
-                       /*
-                        * dgc: I suspect unwritten conversion on ext4+DAX is
-                        * fundamentally broken here when there are concurrent
-                        * read/write in progress on this inode.
-                        */
-                       WARN_ON_ONCE(io_end);
-                       bh->b_assoc_map = inode->i_mapping;
-                       bh->b_private = (void *)(unsigned long)iblock;
-               }
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle,
        return ret;
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create);
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                                  get_block_t *get_block)
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
                               EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
 {
-       ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+       int ret;
+
+       ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result,
-                              EXT4_GET_BLOCKS_NO_LOCK);
+       ret = _ext4_get_block(inode, iblock, bh_result, 0);
+       /*
+        * Blocks should have been preallocated! ext4_file_write_iter() checks
+        * that.
+        */
+       WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+       return ret;
 }
 
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create)
 {
-       int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+       int ret, err;
+       int credits;
+       struct ext4_map_blocks map;
+       handle_t *handle = NULL;
+       int flags = 0;
+
+       ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result, flags);
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+       credits = ext4_chunk_trans_blocks(inode, map.m_len);
+       if (create) {
+               flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       return ret;
+               }
+       }
+
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (create) {
+               err = ext4_journal_stop(handle);
+               if (ret >= 0 && err < 0)
+                       ret = err;
+       }
+       if (ret <= 0)
+               goto out;
+       if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+               int err2;
+
+               /*
+                * We are protected by i_mmap_sem so we know block cannot go
+                * away from under us even though we dropped i_data_sem.
+                * Convert extent to written and write zeros there.
+                *
+                * Note: We may get here even when create == 0.
+                */
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+
+               err = ext4_map_blocks(handle, inode, &map,
+                     EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (err < 0)
+                       ret = err;
+               err2 = ext4_journal_stop(handle);
+               if (err2 < 0 && ret > 0)
+                       ret = err2;
+       }
+out:
+       WARN_ON_ONCE(ret == 0 && create);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               /*
+                * At least for now we have to clear BH_New so that DAX code
+                * doesn't attempt to zero blocks again in a racy way.
+                */
+               bh_result->b_state &= ~(1 << BH_New);
+               bh_result->b_size = map.m_len << inode->i_blkbits;
+               ret = 0;
+       }
+       return ret;
 }
+#endif
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        /* If we do a overwrite dio, i_mutex locking can be released */
        overwrite = *((int *)iocb->private);
 
-       if (overwrite) {
-               down_read(&EXT4_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_unlock(inode);
 
        /*
         * We could direct write to holes and fallocate.
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        }
 
        if (overwrite) {
-               get_block_func = ext4_get_block_write_nolock;
+               get_block_func = ext4_get_block_overwrite;
        } else {
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
@@ -3245,10 +3330,8 @@ retake_lock:
        if (iov_iter_rw(iter) == WRITE)
                inode_dio_end(inode);
        /* take i_mutex locking again if we do a ovewrite dio */
-       if (overwrite) {
-               up_read(&EXT4_I(inode)->i_data_sem);
-               mutex_lock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_lock(inode);
 
        return ret;
 }
@@ -3558,6 +3641,35 @@ int ext4_can_truncate(struct inode *inode)
        return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+{
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+
+       WARN_ON(!inode_is_locked(inode));
+       if (offset > size || offset + len < size)
+               return 0;
+
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+       return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3595,7 +3707,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
        /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                truncate_pagecache_range(inode, first_block_offset,
                                         last_block_offset);
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
@@ -3680,19 +3801,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
-
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 out_stop:
        ext4_journal_stop(handle);
 out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -3762,7 +3879,7 @@ void ext4_truncate(struct inode *inode)
         * have i_mutex locked because it's not necessary.
         */
        if (!(inode->i_state & (I_NEW|I_FREEING)))
-               WARN_ON(!mutex_is_locked(&inode->i_mutex));
+               WARN_ON(!inode_is_locked(inode));
        trace_ext4_truncate_enter(inode);
 
        if (!ext4_can_truncate(inode))
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
                EXT4_I(inode)->i_inline_off = 0;
 }
 
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+               return -EOPNOTSUPP;
+       *projid = EXT4_I(inode)->i_projid;
+       return 0;
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
        struct ext4_iloc iloc;
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        int block;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        inode = iget_locked(sb, ino);
        if (!inode)
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+       else
+               i_projid = EXT4_DEF_PROJID;
+
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle,
        int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        spin_lock(&ei->i_raw_lock);
 
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle,
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        i_uid = i_uid_read(inode);
        i_gid = i_gid_read(inode);
+       i_projid = from_kprojid(&init_user_ns, ei->i_projid);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
                raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle,
                                cpu_to_le16(ei->i_extra_isize);
                }
        }
+
+       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+              i_projid != EXT4_DEF_PROJID);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               raw_inode->i_projid = cpu_to_le32(i_projid);
+
        ext4_inode_csum_set(inode, raw_inode, ei);
        spin_unlock(&ei->i_raw_lock);
        if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                /*
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                truncate_pagecache(inode, inode->i_size);
                if (shrink)
                        ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
        }
 
        if (!rc) {
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -5348,6 +5497,19 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(inode->i_sb);
        return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return err;
+}
index 5e872fd..0f6c369 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <linux/random.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
        return 1;
 }
 
+static int ext4_ioctl_setflags(struct inode *inode,
+                              unsigned int flags)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       handle_t *handle = NULL;
+       int err = EPERM, migrate = 0;
+       struct ext4_iloc iloc;
+       unsigned int oldflags, mask, i;
+       unsigned int jflag;
+
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto flags_out;
+
+       oldflags = ei->i_flags;
+
+       /* The JOURNAL_DATA flag is modifiable only by root */
+       jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+       /*
+        * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+        * the relevant capability.
+        *
+        * This test looks nicer. Thanks to Pauline Middelink
+        */
+       if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+               if (!capable(CAP_LINUX_IMMUTABLE))
+                       goto flags_out;
+       }
+
+       /*
+        * The JOURNAL_DATA flag can only be changed by
+        * the relevant capability.
+        */
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+               if (!capable(CAP_SYS_RESOURCE))
+                       goto flags_out;
+       }
+       if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+               migrate = 1;
+
+       if (flags & EXT4_EOFBLOCKS_FL) {
+               /* we don't support adding EOFBLOCKS flag */
+               if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+                       err = -EOPNOTSUPP;
+                       goto flags_out;
+               }
+       } else if (oldflags & EXT4_EOFBLOCKS_FL)
+               ext4_truncate(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto flags_out;
+       }
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto flags_err;
+
+       for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+               if (!(mask & EXT4_FL_USER_MODIFIABLE))
+                       continue;
+               if (mask & flags)
+                       ext4_set_inode_flag(inode, i);
+               else
+                       ext4_clear_inode_flag(inode, i);
+       }
+
+       ext4_set_inode_flags(inode);
+       inode->i_ctime = ext4_current_time(inode);
+
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+       ext4_journal_stop(handle);
+       if (err)
+               goto flags_out;
+
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+               err = ext4_change_inode_journal_flag(inode, jflag);
+       if (err)
+               goto flags_out;
+       if (migrate) {
+               if (flags & EXT4_EXTENTS_FL)
+                       err = ext4_ext_migrate(inode);
+               else
+                       err = ext4_ind_migrate(inode);
+       }
+
+flags_out:
+       return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       struct inode *inode = file_inode(filp);
+       struct super_block *sb = inode->i_sb;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int err, rc;
+       handle_t *handle;
+       kprojid_t kprojid;
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+               if (projid != EXT4_DEF_PROJID)
+                       return -EOPNOTSUPP;
+               else
+                       return 0;
+       }
+
+       if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+               return -EOPNOTSUPP;
+
+       kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+       if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+               return 0;
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               return err;
+
+       err = -EPERM;
+       inode_lock(inode);
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto out_unlock;
+
+       err = ext4_get_inode_loc(inode, &iloc);
+       if (err)
+               goto out_unlock;
+
+       raw_inode = ext4_raw_inode(&iloc);
+       if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+               err = -EOVERFLOW;
+               brelse(iloc.bh);
+               goto out_unlock;
+       }
+       brelse(iloc.bh);
+
+       dquot_initialize(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+               EXT4_QUOTA_INIT_BLOCKS(sb) +
+               EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out_unlock;
+       }
+
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto out_stop;
+
+       if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+               struct dquot *transfer_to[MAXQUOTAS] = { };
+
+               transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+               if (transfer_to[PRJQUOTA]) {
+                       err = __dquot_transfer(inode, transfer_to);
+                       dqput(transfer_to[PRJQUOTA]);
+                       if (err)
+                               goto out_dirty;
+               }
+       }
+       EXT4_I(inode)->i_projid = kprojid;
+       inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+       rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+       if (!err)
+               err = rc;
+out_stop:
+       ext4_journal_stop(handle);
+out_unlock:
+       inode_unlock(inode);
+       mnt_drop_write_file(filp);
+       return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       if (projid != EXT4_DEF_PROJID)
+               return -EOPNOTSUPP;
+       return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+       __u32 xflags = 0;
+
+       if (iflags & EXT4_SYNC_FL)
+               xflags |= FS_XFLAG_SYNC;
+       if (iflags & EXT4_IMMUTABLE_FL)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (iflags & EXT4_APPEND_FL)
+               xflags |= FS_XFLAG_APPEND;
+       if (iflags & EXT4_NODUMP_FL)
+               xflags |= FS_XFLAG_NODUMP;
+       if (iflags & EXT4_NOATIME_FL)
+               xflags |= FS_XFLAG_NOATIME;
+       if (iflags & EXT4_PROJINHERIT_FL)
+               xflags |= FS_XFLAG_PROJINHERIT;
+       return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+       unsigned long iflags = 0;
+
+       if (xflags & FS_XFLAG_SYNC)
+               iflags |= EXT4_SYNC_FL;
+       if (xflags & FS_XFLAG_IMMUTABLE)
+               iflags |= EXT4_IMMUTABLE_FL;
+       if (xflags & FS_XFLAG_APPEND)
+               iflags |= EXT4_APPEND_FL;
+       if (xflags & FS_XFLAG_NODUMP)
+               iflags |= EXT4_NODUMP_FL;
+       if (xflags & FS_XFLAG_NOATIME)
+               iflags |= EXT4_NOATIME_FL;
+       if (xflags & FS_XFLAG_PROJINHERIT)
+               iflags |= EXT4_PROJINHERIT_FL;
+
+       return iflags;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
                return put_user(flags, (int __user *) arg);
        case EXT4_IOC_SETFLAGS: {
-               handle_t *handle = NULL;
-               int err, migrate = 0;
-               struct ext4_iloc iloc;
-               unsigned int oldflags, mask, i;
-               unsigned int jflag;
+               int err;
 
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
@@ -235,90 +464,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext4_mask_flags(inode->i_mode, flags);
 
-               err = -EPERM;
-               mutex_lock(&inode->i_mutex);
-               /* Is it quota file? Do not allow user to mess with it */
-               if (IS_NOQUOTA(inode))
-                       goto flags_out;
-
-               oldflags = ei->i_flags;
-
-               /* The JOURNAL_DATA flag is modifiable only by root */
-               jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-               /*
-                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-                * the relevant capability.
-                *
-                * This test looks nicer. Thanks to Pauline Middelink
-                */
-               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               goto flags_out;
-               }
-
-               /*
-                * The JOURNAL_DATA flag can only be changed by
-                * the relevant capability.
-                */
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-                       if (!capable(CAP_SYS_RESOURCE))
-                               goto flags_out;
-               }
-               if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-                       migrate = 1;
-
-               if (flags & EXT4_EOFBLOCKS_FL) {
-                       /* we don't support adding EOFBLOCKS flag */
-                       if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-                               err = -EOPNOTSUPP;
-                               goto flags_out;
-                       }
-               } else if (oldflags & EXT4_EOFBLOCKS_FL)
-                       ext4_truncate(inode);
-
-               handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
-                       goto flags_out;
-               }
-               if (IS_SYNC(inode))
-                       ext4_handle_sync(handle);
-               err = ext4_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto flags_err;
-
-               for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-                       if (!(mask & EXT4_FL_USER_MODIFIABLE))
-                               continue;
-                       if (mask & flags)
-                               ext4_set_inode_flag(inode, i);
-                       else
-                               ext4_clear_inode_flag(inode, i);
-               }
-
-               ext4_set_inode_flags(inode);
-               inode->i_ctime = ext4_current_time(inode);
-
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-               ext4_journal_stop(handle);
-               if (err)
-                       goto flags_out;
-
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-                       err = ext4_change_inode_journal_flag(inode, jflag);
-               if (err)
-                       goto flags_out;
-               if (migrate) {
-                       if (flags & EXT4_EXTENTS_FL)
-                               err = ext4_ext_migrate(inode);
-                       else
-                               err = ext4_ind_migrate(inode);
-               }
-
-flags_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_lock(inode);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
                mnt_drop_write_file(filp);
                return err;
        }
@@ -349,7 +497,7 @@ flags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
                if (IS_ERR(handle)) {
                        err = PTR_ERR(handle);
@@ -364,7 +512,7 @@ flags_out:
                ext4_journal_stop(handle);
 
 unlock_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 setversion_out:
                mnt_drop_write_file(filp);
                return err;
@@ -510,9 +658,9 @@ group_add_out:
                 * ext4_ext_swap_inode_data before we switch the
                 * inode format to prevent read.
                 */
-               mutex_lock(&(inode->i_mutex));
+               inode_lock((inode));
                err = ext4_ext_migrate(inode);
-               mutex_unlock(&(inode->i_mutex));
+               inode_unlock((inode));
                mnt_drop_write_file(filp);
                return err;
        }
@@ -689,6 +837,60 @@ encryption_policy_out:
                return -EOPNOTSUPP;
 #endif
        }
+       case EXT4_IOC_FSGETXATTR:
+       {
+               struct fsxattr fa;
+
+               memset(&fa, 0, sizeof(struct fsxattr));
+               ext4_get_inode_flags(ei);
+               fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+                       fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+                               EXT4_I(inode)->i_projid);
+               }
+
+               if (copy_to_user((struct fsxattr __user *)arg,
+                                &fa, sizeof(fa)))
+                       return -EFAULT;
+               return 0;
+       }
+       case EXT4_IOC_FSSETXATTR:
+       {
+               struct fsxattr fa;
+               int err;
+
+               if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+                                  sizeof(fa)))
+                       return -EFAULT;
+
+               /* Make sure caller has proper permission */
+               if (!inode_owner_or_capable(inode))
+                       return -EACCES;
+
+               err = mnt_want_write_file(filp);
+               if (err)
+                       return err;
+
+               flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+               flags = ext4_mask_flags(inode->i_mode, flags);
+
+               inode_lock(inode);
+               flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+                        (flags & EXT4_FL_XFLAG_VISIBLE);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
+               mnt_drop_write_file(filp);
+               if (err)
+                       return err;
+
+               err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+               if (err)
+                       return err;
+
+               return 0;
+       }
        default:
                return -ENOTTY;
        }
index f27e0c2..06574dd 100644 (file)
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
                struct ext4_filename *fname,
                struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode);
+                            struct inode *dir, struct inode *inode);
 
 /* checksumming functions */
 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
  * directory, and adds the dentry to the indexed directory.
  */
 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
-                           struct dentry *dentry,
+                           struct inode *dir,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        struct buffer_head *bh2;
        struct dx_root  *root;
        struct dx_frame frames[2], *frame;
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
 
        if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, &fname,
-                                                  dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
                if (retval < 0)
                        goto out;
                if (retval == 1) {
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
 
        if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dir, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        goto out;
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
                if (blocks == 1 && !dx_fallback &&
                    ext4_has_feature_dir_index(sb)) {
-                       retval = make_indexed_dir(handle, &fname, dentry,
+                       retval = make_indexed_dir(handle, &fname, dir,
                                                  inode, bh);
                        bh = NULL; /* make_indexed_dir releases bh */
                        goto out;
@@ -2154,12 +2152,11 @@ out:
  * Returns 0 for success, or a negative error value
  */
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode)
+                            struct inode *dir, struct inode *inode)
 {
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries, *at;
        struct buffer_head *bh;
-       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
@@ -2756,7 +2753,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /*
         * Exit early if inode already is on orphan list. This is a big speedup
         * since we don't have to contend on the global s_orphan_lock.
@@ -2838,7 +2835,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /* Do this quick check before taking global s_orphan_lock. */
        if (list_empty(&ei->i_orphan))
                return 0;
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
        if (ext4_encrypted_inode(dir) &&
            !ext4_is_child_context_consistent_with_parent(dir, inode))
                return -EPERM;
+
+       if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+          (!projid_eq(EXT4_I(dir)->i_projid,
+                      EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        err = dquot_initialize(dir);
        if (err)
                return err;
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        int credits;
        u8 old_file_type;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+           (!projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                           new.inode)))
                return -EPERM;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)) ||
+           (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(old_dir)->i_projid,
+                       EXT4_I(new_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
index f1b56ff..3ed01ec 100644 (file)
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
 
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ *   page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ *   i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
+       init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
 }
 
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 }
 
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
 
 static int ext4_write_dquot(struct dquot *dquot);
 static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
        .write_info     = ext4_write_info,
        .alloc_dquot    = dquot_alloc,
        .destroy_dquot  = dquot_destroy,
+       .get_projid     = ext4_get_projid,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
@@ -2254,10 +2286,10 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                        __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        truncate_inode_pages(inode->i_mapping, inode->i_size);
                        ext4_truncate(inode);
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        nr_truncates++;
                } else {
                        if (test_opt(sb, DEBUG))
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
                         "without CONFIG_QUOTA");
                return 0;
        }
+       if (ext4_has_feature_project(sb) && !readonly) {
+               ext4_msg(sb, KERN_ERR,
+                        "Filesystem with project quota feature cannot be mounted RDWR "
+                        "without CONFIG_QUOTA");
+               return 0;
+       }
 #endif  /* CONFIG_QUOTA */
        return 1;
 }
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_qcop = &dquot_quotactl_sysfile_ops;
        else
                sb->s_qcop = &ext4_qctl_operations;
-       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -4790,6 +4828,48 @@ restore_opts:
        return err;
 }
 
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+                              kprojid_t projid, struct kstatfs *buf)
+{
+       struct kqid qid;
+       struct dquot *dquot;
+       u64 limit;
+       u64 curblock;
+
+       qid = make_kqid_projid(projid);
+       dquot = dqget(sb, qid);
+       if (IS_ERR(dquot))
+               return PTR_ERR(dquot);
+       spin_lock(&dq_data_lock);
+
+       limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+                dquot->dq_dqb.dqb_bsoftlimit :
+                dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+       if (limit && buf->f_blocks > limit) {
+               curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+               buf->f_blocks = limit;
+               buf->f_bfree = buf->f_bavail =
+                       (buf->f_blocks > curblock) ?
+                        (buf->f_blocks - curblock) : 0;
+       }
+
+       limit = dquot->dq_dqb.dqb_isoftlimit ?
+               dquot->dq_dqb.dqb_isoftlimit :
+               dquot->dq_dqb.dqb_ihardlimit;
+       if (limit && buf->f_files > limit) {
+               buf->f_files = limit;
+               buf->f_ffree =
+                       (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+                        (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+       }
+
+       spin_unlock(&dq_data_lock);
+       dqput(dquot);
+       return 0;
+}
+#endif
+
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 
+#ifdef CONFIG_QUOTA
+       if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+           sb_has_quota_limits_enabled(sb, PRJQUOTA))
+               ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
        return 0;
 }
 
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
        struct inode *qf_inode;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
        int type, err = 0;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
index 011ba66..c70d06a 100644 (file)
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        truncate_inode_pages(inode->i_mapping, inode->i_size);
        ext4_truncate(inode);
+       up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*
index ac9e7c6..5c06db1 100644 (file)
@@ -794,7 +794,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (start >= isize)
@@ -860,7 +860,7 @@ out:
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 18ddb1e..ea272be 100644 (file)
@@ -333,7 +333,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
        loff_t isize;
        int err = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize)
@@ -388,10 +388,10 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 found:
        if (whence == SEEK_HOLE && data_ofs > isize)
                data_ofs = isize;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return vfs_setpos(file, data_ofs, maxbytes);
 fail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return -ENXIO;
 }
 
@@ -1219,7 +1219,7 @@ static long f2fs_fallocate(struct file *file, int mode,
                        FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                if (offset >= inode->i_size)
@@ -1243,7 +1243,7 @@ static long f2fs_fallocate(struct file *file, int mode,
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        trace_f2fs_fallocate(inode, mode, offset, len, ret);
        return ret;
@@ -1307,13 +1307,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
        flags = f2fs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = fi->i_flags;
 
        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                if (!capable(CAP_LINUX_IMMUTABLE)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto out;
                }
@@ -1322,7 +1322,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
        flags = flags & FS_FL_USER_MODIFIABLE;
        flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
        fi->i_flags = flags;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        f2fs_set_inode_flags(inode);
        inode->i_ctime = CURRENT_TIME;
@@ -1667,7 +1667,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 
        f2fs_balance_fs(sbi, true);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* writeback all dirty pages in the range */
        err = filemap_write_and_wait_range(inode->i_mapping, range->start,
@@ -1778,7 +1778,7 @@ do_map:
 clear_out:
        clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err)
                range->len = (u64)total << PAGE_CACHE_SHIFT;
        return err;
index 7def96c..d0b95c9 100644 (file)
@@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
 
        buf.dirent = dirent;
        buf.result = 0;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        buf.ctx.pos = file->f_pos;
        ret = -ENOENT;
        if (!IS_DEADDIR(inode)) {
@@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
                                    short_only, both ? &buf : NULL);
                file->f_pos = buf.ctx.pos;
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret >= 0)
                ret = buf.result;
        return ret;
index 43d3475..f701856 100644 (file)
@@ -24,9 +24,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
        u32 attr;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        attr = fat_make_attrs(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return put_user(attr, user_attr);
 }
@@ -47,7 +47,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        err = mnt_want_write_file(file);
        if (err)
                goto out;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -109,7 +109,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
 out:
        return err;
@@ -246,7 +246,7 @@ static long fat_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (mode & FALLOC_FL_KEEP_SIZE) {
                ondisksize = inode->i_blocks << 9;
                if ((offset + len) <= ondisksize)
@@ -272,7 +272,7 @@ static long fat_fallocate(struct file *file, int mode,
        }
 
 error:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 5797d45..c5618db 100644 (file)
@@ -46,9 +46,9 @@ void put_filesystem(struct file_system_type *fs)
 static struct file_system_type **find_filesystem(const char *name, unsigned len)
 {
        struct file_system_type **p;
-       for (p=&file_systems; *p; p=&(*p)->next)
-               if (strlen((*p)->name) == len &&
-                   strncmp((*p)->name, name, len) == 0)
+       for (p = &file_systems; *p; p = &(*p)->next)
+               if (strncmp((*p)->name, name, len) == 0 &&
+                   !(*p)->name[len])
                        break;
        return p;
 }
index 712601f..4b855b6 100644 (file)
@@ -944,7 +944,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        if (!parent)
                return -ENOENT;
 
-       mutex_lock(&parent->i_mutex);
+       inode_lock(parent);
        if (!S_ISDIR(parent->i_mode))
                goto unlock;
 
@@ -962,7 +962,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        fuse_invalidate_entry(entry);
 
        if (child_nodeid != 0 && d_really_is_positive(entry)) {
-               mutex_lock(&d_inode(entry)->i_mutex);
+               inode_lock(d_inode(entry));
                if (get_node_id(d_inode(entry)) != child_nodeid) {
                        err = -ENOENT;
                        goto badentry;
@@ -983,7 +983,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
                clear_nlink(d_inode(entry));
                err = 0;
  badentry:
-               mutex_unlock(&d_inode(entry)->i_mutex);
+               inode_unlock(d_inode(entry));
                if (!err)
                        d_delete(entry);
        } else {
@@ -992,7 +992,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        dput(entry);
 
  unlock:
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        iput(parent);
        return err;
 }
@@ -1504,7 +1504,7 @@ void fuse_set_nowrite(struct inode *inode)
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
 
-       BUG_ON(!mutex_is_locked(&inode->i_mutex));
+       BUG_ON(!inode_is_locked(inode));
 
        spin_lock(&fc->lock);
        BUG_ON(fi->writectr < 0);
index aa03aab..b03d253 100644 (file)
@@ -207,7 +207,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                return err;
 
        if (lock_inode)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 
@@ -215,7 +215,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                fuse_finish_open(inode, file);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
@@ -413,9 +413,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        fuse_sync_writes(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
@@ -450,7 +450,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        if (is_bad_inode(inode))
                return -EIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Start writeback against all dirty pages of the inode, then
@@ -486,7 +486,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                err = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -1160,7 +1160,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return generic_file_write_iter(iocb, from);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1210,7 +1210,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 out:
        current->backing_dev_info = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return written ? written : err;
 }
@@ -1322,10 +1322,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                fuse_sync_writes(inode);
                if (!write)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
        }
 
        while (count) {
@@ -1413,14 +1413,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return -EIO;
 
        /* Don't allow parallel writes to the same file */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = generic_write_checks(iocb, from);
        if (res > 0)
                res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
        fuse_invalidate_attr(inode);
        if (res > 0)
                fuse_write_update_size(inode, iocb->ki_pos);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return res;
 }
@@ -2287,17 +2287,17 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
                retval = generic_file_llseek(file, offset, whence);
                break;
        case SEEK_END:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_update_attributes(inode, NULL, file, NULL);
                if (!retval)
                        retval = generic_file_llseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        case SEEK_HOLE:
        case SEEK_DATA:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_lseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        default:
                retval = -EINVAL;
@@ -2944,7 +2944,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
                return -EOPNOTSUPP;
 
        if (lock_inode) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (mode & FALLOC_FL_PUNCH_HOLE) {
                        loff_t endbyte = offset + length - 1;
                        err = filemap_write_and_wait_range(inode->i_mapping,
@@ -2990,7 +2990,7 @@ out:
                clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
index 7412863..c9384f9 100644 (file)
@@ -914,7 +914,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
        if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
@@ -946,7 +946,7 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 3e94400..352f958 100644 (file)
@@ -2067,7 +2067,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
        if (ret)
@@ -2094,7 +2094,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        gfs2_glock_dq_uninit(&gh);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index be6d9c4..a398913 100644 (file)
@@ -888,7 +888,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                return -ENOMEM;
 
        sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        for (qx = 0; qx < num_qd; qx++) {
                error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
                                           GL_NOCACHE, &ghs[qx]);
@@ -953,7 +953,7 @@ out_alloc:
 out:
        while (qx--)
                gfs2_glock_dq_uninit(&ghs[qx]);
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
        kfree(ghs);
        gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
        return error;
@@ -1674,7 +1674,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
        if (error)
                goto out_put;
 
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
        if (error)
                goto out_unlockput;
@@ -1739,7 +1739,7 @@ out_i:
 out_q:
        gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
 out_put:
        qd_put(qd);
        return error;
index 70788e0..e9f2b85 100644 (file)
@@ -173,9 +173,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
 {
        struct hfs_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index b99ebdd..6686bf3 100644 (file)
@@ -570,13 +570,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
        if (HFS_IS_RSRC(inode))
                inode = HFS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfs_file_truncate(inode);
                //if (inode->i_flags & S_DEAD) {
                //      hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
                //      hfs_delete_inode(inode);
                //}
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -656,7 +656,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* sync the inode to buffers */
        ret = write_inode_now(inode, 0);
@@ -668,7 +668,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = sync_blockdev(sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index d0f39dc..a4e867e 100644 (file)
@@ -284,9 +284,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
 {
        struct hfsplus_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index 19b33f8..1a6394c 100644 (file)
@@ -229,14 +229,14 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfsplus_file_truncate(inode);
                if (inode->i_flags & S_DEAD) {
                        hfsplus_delete_cat(inode->i_ino,
                                           HFSPLUS_SB(sb)->hidden_dir, NULL);
                        hfsplus_delete_inode(inode);
                }
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -286,7 +286,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        error = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (error)
                return error;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Sync inode metadata into the catalog and extent trees.
@@ -327,7 +327,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return error;
 }
index 0624ce4..32a49e2 100644 (file)
@@ -93,7 +93,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
                goto out_drop_write;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
            inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
@@ -126,7 +126,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
        mark_inode_dirty(inode);
 
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
index cfaa18c..d1abbee 100644 (file)
@@ -378,9 +378,9 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index dc540bf..e57a53c 100644 (file)
@@ -33,7 +33,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        if (whence == SEEK_DATA || whence == SEEK_HOLE)
                return -EINVAL;
 
-       mutex_lock(&i->i_mutex);
+       inode_lock(i);
        hpfs_lock(s);
 
        /*pr_info("dir lseek\n");*/
@@ -48,12 +48,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 ok:
        filp->f_pos = new_off;
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return new_off;
 fail:
        /*pr_warn("illegal lseek: %016llx\n", new_off);*/
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return -ESPIPE;
 }
 
index 8bbf7f3..e1f465a 100644 (file)
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        file_accessed(file);
 
        ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        if (vma->vm_flags & VM_WRITE && inode->i_size < len)
                inode->i_size = len;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (hole_end > hole_start) {
                struct address_space *mapping = inode->i_mapping;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_mmap_lock_write(mapping);
                if (!RB_EMPTY_ROOT(&mapping->i_mmap))
                        hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -538,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                                                hole_end  >> PAGE_SHIFT);
                i_mmap_unlock_write(mapping);
                remove_inode_hugepages(inode, hole_start, hole_end);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -572,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
        start = offset >> hpage_shift;
        end = (offset + len + hpage_size - 1) >> hpage_shift;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
        error = inode_newsize_ok(inode, offset + len);
@@ -659,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                i_size_write(inode, offset + len);
        inode->i_ctime = CURRENT_TIME;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
index e491e54..9f62db3 100644 (file)
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
         */
        spin_lock_irq(&inode->i_data.tree_lock);
        BUG_ON(inode->i_data.nrpages);
-       BUG_ON(inode->i_data.nrshadows);
+       BUG_ON(inode->i_data.nrexceptional);
        spin_unlock_irq(&inode->i_data.tree_lock);
        BUG_ON(!list_empty(&inode->i_data.private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
@@ -966,9 +966,9 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
                swap(inode1, inode2);
 
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_lock(&inode1->i_mutex);
+               inode_lock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+               inode_lock_nested(inode2, I_MUTEX_NONDIR2);
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
 
@@ -980,9 +980,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_unlock(&inode1->i_mutex);
+               inode_unlock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_unlock(&inode2->i_mutex);
+               inode_unlock(inode2);
 }
 EXPORT_SYMBOL(unlock_two_nondirectories);
 
index 29466c3..116a333 100644 (file)
@@ -434,9 +434,9 @@ int generic_block_fiemap(struct inode *inode,
                         u64 len, get_block_t *get_block)
 {
        int ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(generic_block_fiemap);
index a3750f9..0ae91ad 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
 #include "nodelist.h"
 
 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
        return 0;
 
  out_free:
-#ifndef __ECOS
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-#endif
-               kfree(c->blocks);
+       kvfree(c->blocks);
 
        return ret;
 }
index f509f62..c5ac594 100644 (file)
@@ -39,10 +39,10 @@ int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Trigger GC to flush any pending writes for this inode */
        jffs2_flush_wbuf_gc(c, inode->i_ino);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 2caf168..bead25a 100644 (file)
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 out_root:
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
  out_inohash:
        jffs2_clear_xattr_subsystem(c);
        kfree(c->inocache_list);
index bb080c2..0a9a114 100644 (file)
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
 
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
        jffs2_flash_cleanup(c);
        kfree(c->inocache_list);
        jffs2_clear_xattr_subsystem(c);
index 0e026a7..4ce7735 100644 (file)
@@ -38,17 +38,17 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (rc)
                return rc;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (!(inode->i_state & I_DIRTY_ALL) ||
            (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
                /* Make sure committed changes hit the disk */
                jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return rc;
        }
 
        rc |= jfs_commit_inode(inode, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return rc ? -EIO : 0;
 }
index 8db8b7d..8653cac 100644 (file)
@@ -96,7 +96,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                }
 
                /* Lock against other parallel changes of flags */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
                jfs_get_inode_flags(jfs_inode);
                oldflags = jfs_inode->mode2;
@@ -109,7 +109,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        ((flags ^ oldflags) &
                        (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                err = -EPERM;
                                goto setflags_out;
                        }
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                jfs_inode->mode2 = flags;
 
                jfs_set_inode_flags(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setflags_out:
index 900925b..4f5d85b 100644 (file)
@@ -792,7 +792,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        struct buffer_head tmp_bh;
        struct buffer_head *bh;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -824,7 +824,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        }
 out:
        if (len == towrite) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
        if (inode->i_size < off+len-towrite)
@@ -832,7 +832,7 @@ out:
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return len - towrite;
 }
 
index 8219738..996b774 100644 (file)
@@ -1511,9 +1511,9 @@ static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
        struct inode *inode = file_inode(file);
        loff_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_file_llseek(file, offset, whence);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index 0149129..0ca80b2 100644 (file)
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(dcache_dir_close);
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry *dentry = file->f_path.dentry;
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -97,7 +97,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -124,7 +124,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&dentry->d_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 EXPORT_SYMBOL(dcache_dir_lseek);
@@ -941,7 +941,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_mapping_buffers(inode->i_mapping);
        if (!(inode->i_state & I_DIRTY_ALL))
                goto out;
@@ -953,7 +953,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
                ret = err;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(__generic_file_fsync);
index af1ed74..7c5f91b 100644 (file)
@@ -1650,12 +1650,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
         * bother, maybe that's a sign this just isn't a good file to
         * hand out a delegation on.
         */
-       if (is_deleg && !mutex_trylock(&inode->i_mutex))
+       if (is_deleg && !inode_trylock(inode))
                return -EAGAIN;
 
        if (is_deleg && arg == F_WRLCK) {
                /* Write delegations are not currently supported: */
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(1);
                return -EINVAL;
        }
@@ -1732,7 +1732,7 @@ out:
        spin_unlock(&ctx->flc_lock);
        locks_dispose_list(&dispose);
        if (is_deleg)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        if (!error && !my_fl)
                *flp = NULL;
        return error;
index 1a6f016..61eaeb1 100644 (file)
@@ -204,12 +204,12 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (err)
                        return err;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = li->li_flags;
                flags &= LOGFS_FL_USER_MODIFIABLE;
                flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
                li->li_flags = flags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty_sync(inode);
@@ -230,11 +230,11 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        logfs_get_wblocks(sb, NULL, WF_LOCK);
        logfs_write_anchor(sb);
        logfs_put_wblocks(sb, NULL, WF_LOCK);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index bceefd5..f624d13 100644 (file)
@@ -1629,9 +1629,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
        parent = nd->path.dentry;
        BUG_ON(nd->inode != parent->d_inode);
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = __lookup_hash(&nd->last, parent, nd->flags);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
        path->mnt = nd->path.mnt;
@@ -2229,10 +2229,10 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
                putname(filename);
                return ERR_PTR(-EINVAL);
        }
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        d = __lookup_hash(&last, path->dentry, 0);
        if (IS_ERR(d)) {
-               mutex_unlock(&path->dentry->d_inode->i_mutex);
+               inode_unlock(path->dentry->d_inode);
                path_put(path);
        }
        putname(filename);
@@ -2282,7 +2282,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        unsigned int c;
        int err;
 
-       WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(base->d_inode));
 
        this.name = name;
        this.len = len;
@@ -2380,9 +2380,9 @@ struct dentry *lookup_one_len_unlocked(const char *name,
        if (ret)
                return ret;
 
-       mutex_lock(&base->d_inode->i_mutex);
+       inode_lock(base->d_inode);
        ret =  __lookup_hash(&this, base, 0);
-       mutex_unlock(&base->d_inode->i_mutex);
+       inode_unlock(base->d_inode);
        return ret;
 }
 EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2463,7 +2463,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                goto done;
        }
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = d_lookup(dir, &nd->last);
        if (!dentry) {
                /*
@@ -2473,16 +2473,16 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                 */
                dentry = d_alloc(dir, &nd->last);
                if (!dentry) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return -ENOMEM;
                }
                dentry = lookup_real(dir->d_inode, dentry, nd->flags);
                if (IS_ERR(dentry)) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return PTR_ERR(dentry);
                }
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
 done:
        if (d_is_negative(dentry)) {
@@ -2672,7 +2672,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        struct dentry *p;
 
        if (p1 == p2) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                return NULL;
        }
 
@@ -2680,29 +2680,29 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
        p = d_ancestor(p2, p1);
        if (p) {
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
        p = d_ancestor(p1, p2);
        if (p) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
-       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+       inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+       inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
        return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
 
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
-       mutex_unlock(&p1->d_inode->i_mutex);
+       inode_unlock(p1->d_inode);
        if (p1 != p2) {
-               mutex_unlock(&p2->d_inode->i_mutex);
+               inode_unlock(p2->d_inode);
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
 }
@@ -3141,9 +3141,9 @@ retry_lookup:
                 * dropping this one anyway.
                 */
        }
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        error = lookup_open(nd, &path, file, op, got_write, opened);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (error <= 0) {
                if (error)
@@ -3489,7 +3489,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         * Do the final lookup.
         */
        lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path->dentry, lookup_flags);
        if (IS_ERR(dentry))
                goto unlock;
@@ -3518,7 +3518,7 @@ fail:
        dput(dentry);
        dentry = ERR_PTR(error);
 unlock:
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        if (!err2)
                mnt_drop_write(path->mnt);
 out:
@@ -3538,7 +3538,7 @@ EXPORT_SYMBOL(kern_path_create);
 void done_path_create(struct path *path, struct dentry *dentry)
 {
        dput(dentry);
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        mnt_drop_write(path->mnt);
        path_put(path);
 }
@@ -3735,7 +3735,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EPERM;
 
        dget(dentry);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
 
        error = -EBUSY;
        if (is_local_mountpoint(dentry))
@@ -3755,7 +3755,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        detach_mounts(dentry);
 
 out:
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        dput(dentry);
        if (!error)
                d_delete(dentry);
@@ -3794,7 +3794,7 @@ retry:
        if (error)
                goto exit1;
 
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -3810,7 +3810,7 @@ retry:
 exit3:
        dput(dentry);
 exit2:
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        mnt_drop_write(path.mnt);
 exit1:
        path_put(&path);
@@ -3856,7 +3856,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
        if (!dir->i_op->unlink)
                return -EPERM;
 
-       mutex_lock(&target->i_mutex);
+       inode_lock(target);
        if (is_local_mountpoint(dentry))
                error = -EBUSY;
        else {
@@ -3873,7 +3873,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
                }
        }
 out:
-       mutex_unlock(&target->i_mutex);
+       inode_unlock(target);
 
        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
        if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -3916,7 +3916,7 @@ retry:
        if (error)
                goto exit1;
 retry_deleg:
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
@@ -3934,7 +3934,7 @@ retry_deleg:
 exit2:
                dput(dentry);
        }
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        if (inode)
                iput(inode);    /* truncate the inode here */
        inode = NULL;
@@ -4086,7 +4086,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Make sure we don't allow creating hardlink to an unlinked file */
        if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
                error =  -ENOENT;
@@ -4103,7 +4103,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                inode->i_state &= ~I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_link(dir, inode, new_dentry);
        return error;
@@ -4303,7 +4303,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!is_dir || (flags & RENAME_EXCHANGE))
                lock_two_nondirectories(source, target);
        else if (target)
-               mutex_lock(&target->i_mutex);
+               inode_lock(target);
 
        error = -EBUSY;
        if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4356,7 +4356,7 @@ out:
        if (!is_dir || (flags & RENAME_EXCHANGE))
                unlock_two_nondirectories(source, target);
        else if (target)
-               mutex_unlock(&target->i_mutex);
+               inode_unlock(target);
        dput(new_dentry);
        if (!error) {
                fsnotify_move(old_dir, new_dir, old_name, is_dir,
index a830e14..4fb1691 100644 (file)
@@ -1961,9 +1961,9 @@ static struct mountpoint *lock_mount(struct path *path)
        struct vfsmount *mnt;
        struct dentry *dentry = path->dentry;
 retry:
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (unlikely(cant_mount(dentry))) {
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               inode_unlock(dentry->d_inode);
                return ERR_PTR(-ENOENT);
        }
        namespace_lock();
@@ -1974,13 +1974,13 @@ retry:
                        mp = new_mountpoint(dentry);
                if (IS_ERR(mp)) {
                        namespace_unlock();
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       inode_unlock(dentry->d_inode);
                        return mp;
                }
                return mp;
        }
        namespace_unlock();
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        path_put(path);
        path->mnt = mnt;
        dentry = path->dentry = dget(mnt->mnt_root);
@@ -1992,7 +1992,7 @@ static void unlock_mount(struct mountpoint *where)
        struct dentry *dentry = where->m_dentry;
        put_mountpoint(where);
        namespace_unlock();
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 }
 
 static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
index f0e3e9e..26c2de2 100644 (file)
@@ -369,7 +369,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
        if (!res) {
                struct inode *inode = d_inode(dentry);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
                        ncp_new_dentry(dentry);
                        val=1;
@@ -377,7 +377,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        ncp_dbg(2, "found, but dirEntNum changed\n");
 
                ncp_update_inode2(inode, &finfo);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
 finished:
@@ -639,9 +639,9 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
        } else {
                struct inode *inode = d_inode(newdent);
 
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(inode, I_MUTEX_CHILD);
                ncp_update_inode2(inode, entry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (ctl.idx >= NCP_DIRCACHE_SIZE) {
index 011324c..dd38ca1 100644 (file)
@@ -224,10 +224,10 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        iocb->ki_pos = pos;
 
        if (pos > i_size_read(inode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (pos > i_size_read(inode))
                        i_size_write(inode, pos);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        ncp_dbg(1, "exit %pD2\n", file);
 outrel:
index c82a212..9cce670 100644 (file)
@@ -940,7 +940,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
        dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
                        filp, offset, whence);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case 1:
                        offset += filp->f_pos;
@@ -957,7 +957,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
                dir_ctx->duped = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -972,9 +972,9 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 
        dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
index 7ab7ec9..7a0cfd3 100644 (file)
@@ -580,7 +580,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        if (!count)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        result = nfs_sync_mapping(mapping);
        if (result)
                goto out_unlock;
@@ -608,7 +608,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        NFS_I(inode)->read_io += count;
        result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -622,7 +622,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return result;
 }
@@ -1005,7 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
        pos = iocb->ki_pos;
        end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        result = nfs_sync_mapping(mapping);
        if (result)
@@ -1045,7 +1045,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
                                              pos >> PAGE_CACHE_SHIFT, end);
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -1066,7 +1066,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return result;
 }
 
index 4ef8f5a..748bb81 100644 (file)
@@ -278,9 +278,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
index bb1f4e7..3384dc8 100644 (file)
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
        u32 i, j;
 
        if (fl->commit_through_mds) {
-               nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+               nfs_request_add_commit_list(req, cinfo);
        } else {
                /* Note that we are calling nfs4_fl_calc_j_index on each page
                 * that ends up being committed to a data server.  An attractive
index 6594e9f..0cb1abd 100644 (file)
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                                        hdr->pgio_mirror_idx + 1,
                                        &hdr->pgio_mirror_idx))
                        goto out_eagain;
-               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                        &hdr->lseg->pls_layout->plh_flags);
                pnfs_read_resend_pnfs(hdr);
                return task->tk_status;
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
        start = xdr_reserve_space(xdr, 4);
        BUG_ON(!start);
 
-       if (ff_layout_encode_ioerr(flo, xdr, args))
-               goto out;
-
+       ff_layout_encode_ioerr(flo, xdr, args);
        ff_layout_encode_iostats(flo, xdr, args);
-out:
+
        *start = cpu_to_be32((xdr->p - start - 1) * 4);
        dprintk("%s: Return\n", __func__);
 }
index bd03275..eb37046 100644 (file)
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
        err->length = end - err->offset;
 }
 
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err,  u64 offset,
-                              u64 length, int status, enum nfs_opnum4 opnum,
-                              nfs4_stateid *stateid,
-                              struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+               const struct nfs4_ff_layout_ds_err *e2)
 {
-       return err->status == status && err->opnum == opnum &&
-              nfs4_stateid_match(&err->stateid, stateid) &&
-              !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
-              end_offset(err->offset, err->length) >= offset &&
-              err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
-                          struct nfs4_ff_layout_ds_err *new)
-{
-       if (!ds_error_can_merge(old, new->offset, new->length, new->status,
-                               new->opnum, &new->stateid, &new->deviceid))
-               return false;
-
-       extend_ds_error(old, new->offset, new->length);
-       return true;
+       int ret;
+
+       if (e1->opnum != e2->opnum)
+               return e1->opnum < e2->opnum ? -1 : 1;
+       if (e1->status != e2->status)
+               return e1->status < e2->status ? -1 : 1;
+       ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+       if (ret != 0)
+               return ret;
+       ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+       if (ret != 0)
+               return ret;
+       if (end_offset(e1->offset, e1->length) < e2->offset)
+               return -1;
+       if (e1->offset > end_offset(e2->offset, e2->length))
+               return 1;
+       /* If ranges overlap or are contiguous, they are the same */
+       return 0;
 }
 
-static bool
+static void
 ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
                              struct nfs4_ff_layout_ds_err *dserr)
 {
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (merge_ds_error(err, dserr)) {
-                       return true;
-               }
-       }
-
-       list_add(&dserr->list, &flo->error_list);
-       return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
-                         u64 length, int status, enum nfs_opnum4 opnum,
-                         nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
-       bool found = false;
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (ds_error_can_merge(err, offset, length, status, opnum,
-                                      stateid, deviceid)) {
-                       found = true;
-                       extend_ds_error(err, offset, length);
+       struct nfs4_ff_layout_ds_err *err, *tmp;
+       struct list_head *head = &flo->error_list;
+       int match;
+
+       /* Do insertion sort w/ merges */
+       list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+               match = ff_ds_error_match(err, dserr);
+               if (match < 0)
+                       continue;
+               if (match > 0) {
+                       /* Add entry "dserr" _before_ entry "err" */
+                       head = &err->list;
                        break;
                }
+               /* Entries match, so merge "err" into "dserr" */
+               extend_ds_error(dserr, err->offset, err->length);
+               list_del(&err->list);
+               kfree(err);
        }
 
-       return found;
+       list_add_tail(&dserr->list, head);
 }
 
 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
                             gfp_t gfp_flags)
 {
        struct nfs4_ff_layout_ds_err *dserr;
-       bool needfree;
 
        if (status == 0)
                return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
        if (mirror->mirror_ds == NULL)
                return -EINVAL;
 
-       spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
-                                     &mirror->stateid,
-                                     &mirror->mirror_ds->id_node.deviceid)) {
-               spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-               return 0;
-       }
-       spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
        dserr = kmalloc(sizeof(*dserr), gfp_flags);
        if (!dserr)
                return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
               NFS4_DEVICEID4_SIZE);
 
        spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       needfree = ff_layout_add_ds_error_locked(flo, dserr);
+       ff_layout_add_ds_error_locked(flo, dserr);
        spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-       if (needfree)
-               kfree(dserr);
 
        return 0;
 }
@@ -431,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                                         OP_ILLEGAL, GFP_NOIO);
                if (!fail_return) {
                        if (ff_layout_has_available_ds(lseg))
-                               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                        &lseg->pls_layout->plh_flags);
                        else
                                pnfs_error_mark_layout_for_return(ino, lseg);
index 8e24d88..86faecf 100644 (file)
@@ -661,9 +661,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        trace_nfs_getattr_enter(inode);
        /* Flush out writes to the server in order to update c/mtime.  */
        if (S_ISREG(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = nfs_sync_inode(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                if (err)
                        goto out;
        }
@@ -1178,9 +1178,9 @@ static int __nfs_revalidate_mapping(struct inode *inode,
        spin_unlock(&inode->i_lock);
        trace_nfs_invalidate_mapping_enter(inode);
        if (may_lock) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_invalidate_mapping(inode, mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        } else
                ret = nfs_invalidate_mapping(inode, mapping);
        trace_nfs_invalidate_mapping_exit(inode, ret);
index 4e8cc94..9a547aa 100644 (file)
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
                      struct nfs_commit_info *cinfo,
                      u32 ds_commit_idx);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
                                 struct nfs_commit_info *cinfo);
 void nfs_request_add_commit_list_locked(struct nfs_page *req,
                struct list_head *dst,
index 6e81749..bd25dc7 100644 (file)
@@ -101,13 +101,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
        if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -123,7 +123,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
                return -EOPNOTSUPP;
 
        nfs_wb_all(inode);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == 0)
@@ -131,7 +131,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 26f9a23..57ca1c8 100644 (file)
@@ -141,11 +141,11 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
                if (!ret)
                        ret = pnfs_sync_inode(inode, !!datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
@@ -219,13 +219,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
        /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
        if (same_inode) {
-               mutex_lock(&src_inode->i_mutex);
+               inode_lock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+               inode_lock_nested(src_inode, I_MUTEX_CHILD);
        } else {
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(src_inode, I_MUTEX_PARENT);
+               inode_lock_nested(dst_inode, I_MUTEX_CHILD);
        }
 
        /* flush all pending writes on both src and dst so that server
@@ -246,13 +246,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
 out_unlock:
        if (same_inode) {
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_unlock(&src_inode->i_mutex);
-               mutex_unlock(&dst_inode->i_mutex);
+               inode_unlock(src_inode);
+               inode_unlock(dst_inode);
        } else {
-               mutex_unlock(&dst_inode->i_mutex);
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(dst_inode);
+               inode_unlock(src_inode);
        }
 out:
        return ret;
index a3592cc..482b6e9 100644 (file)
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
  */
 static LIST_HEAD(pnfs_modules_tbl);
 
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
-                      enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
 
 /* Return the registered pnfs layout driver module matching given id */
 static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        struct inode *inode = lo->plh_inode;
 
+       pnfs_layoutreturn_before_put_layout_hdr(lo);
+
        if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
                if (!list_empty(&lo->plh_segs))
                        WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_segment *lseg)
-{
-       struct pnfs_layout_segment *s;
-
-       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               return false;
-
-       list_for_each_entry(s, &lo->plh_segs, pls_list)
-               if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
-                       return false;
-
-       return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
-       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
-               return false;
-       lo->plh_return_iomode = 0;
-       pnfs_get_layout_hdr(lo);
-       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-       return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
-               struct pnfs_layout_hdr *lo, struct inode *inode)
-{
-       lo = lseg->pls_layout;
-       inode = lo->plh_inode;
-
-       spin_lock(&inode->i_lock);
-       if (pnfs_layout_need_return(lo, lseg)) {
-               nfs4_stateid stateid;
-               enum pnfs_iomode iomode;
-               bool send;
-
-               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
-               iomode = lo->plh_return_iomode;
-               send = pnfs_prepare_layoutreturn(lo);
-               spin_unlock(&inode->i_lock);
-               if (send) {
-                       /* Send an async layoutreturn so we dont deadlock */
-                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
-               }
-       } else
-               spin_unlock(&inode->i_lock);
-}
-
 void
 pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 {
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                atomic_read(&lseg->pls_refcount),
                test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 
-       /* Handle the case where refcount != 1 */
-       if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
-               return;
-
        lo = lseg->pls_layout;
        inode = lo->plh_inode;
-       /* Do we need a layoutreturn? */
-       if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+       return true;
+}
+
 static int
 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                       enum pnfs_iomode iomode, bool sync)
@@ -971,6 +923,48 @@ out:
        return status;
 }
 
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+       struct pnfs_layout_segment *s;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return false;
+
+       /* Defer layoutreturn until all lsegs are done */
+       list_for_each_entry(s, &lo->plh_segs, pls_list) {
+               if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+                       return false;
+       }
+
+       return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode= lo->plh_inode;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return;
+       spin_lock(&inode->i_lock);
+       if (pnfs_layout_need_return(lo)) {
+               nfs4_stateid stateid;
+               enum pnfs_iomode iomode;
+               bool send;
+
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+               iomode = lo->plh_return_iomode;
+               send = pnfs_prepare_layoutreturn(lo);
+               spin_unlock(&inode->i_lock);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+               }
+       } else
+               spin_unlock(&inode->i_lock);
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino)
 
        nfs4_stateid_copy(&stateid, &lo->plh_stateid);
        /* always send layoutreturn if being marked so */
-       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                   &lo->plh_flags))
                layoutreturn = pnfs_prepare_layoutreturn(lo);
 
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                        pnfs_set_plh_return_iomode(lo, return_range->iomode);
                        if (!mark_lseg_invalid(lseg, tmp_list))
                                remaining++;
-                       set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                       set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                        &lo->plh_flags);
                }
        return remaining;
index 9f4e2a4..1ac1db5 100644 (file)
@@ -94,8 +94,8 @@ enum {
        NFS_LAYOUT_RO_FAILED = 0,       /* get ro layout failed stop trying */
        NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
        NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
-       NFS_LAYOUT_RETURN,              /* Return this layout ASAP */
-       NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
+       NFS_LAYOUT_RETURN,              /* layoutreturn in progress */
+       NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
 };
index ce43cd6..5754835 100644 (file)
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
-                           struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
        spin_lock(cinfo->lock);
-       nfs_request_add_commit_list_locked(req, dst, cinfo);
+       nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
        spin_unlock(cinfo->lock);
        nfs_mark_page_unstable(req->wb_page, cinfo);
 }
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 {
        if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
                return;
-       nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+       nfs_request_add_commit_list(req, cinfo);
 }
 
 static void
index 819ad81..4cba786 100644 (file)
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
        struct inode *inode = d_inode(resfh->fh_dentry);
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        status = security_inode_setsecctx(resfh->fh_dentry,
                label->data, label->len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (status)
                /*
index 79f0307..dc8ebec 100644 (file)
@@ -192,7 +192,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
        dir = nn->rec_file->f_path.dentry;
        /* lock the parent */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
        if (IS_ERR(dentry)) {
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_put:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        if (status == 0) {
                if (nn->in_grace) {
                        crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +286,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
        }
 
        status = iterate_dir(nn->rec_file, &ctx.ctx);
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
                if (!status) {
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                list_del(&entry->list);
                kfree(entry);
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        nfs4_reset_creds(original_cred);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +322,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
        dir = nn->rec_file->f_path.dentry;
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        dentry = lookup_one_len(name, dir, namlen);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
@@ -335,7 +335,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 out:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        return status;
 }
 
index 0770bcb..f84fe6b 100644 (file)
@@ -288,7 +288,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
        }
 
        inode = d_inode(dentry);
-       mutex_lock_nested(&inode->i_mutex, subclass);
+       inode_lock_nested(inode, subclass);
        fill_pre_wcc(fhp);
        fhp->fh_locked = true;
 }
@@ -307,7 +307,7 @@ fh_unlock(struct svc_fh *fhp)
 {
        if (fhp->fh_locked) {
                fill_post_wcc(fhp);
-               mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+               inode_unlock(d_inode(fhp->fh_dentry));
                fhp->fh_locked = false;
        }
 }
index 6739077..5d2a57e 100644 (file)
@@ -493,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        dentry = fhp->fh_dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        host_error = security_inode_setsecctx(dentry, label->data, label->len);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return nfserrno(host_error);
 }
 #else
index 10b2252..21a1e2e 100644 (file)
@@ -1003,7 +1003,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
 
@@ -1113,6 +1113,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
index aba4381..e8fe248 100644 (file)
@@ -158,7 +158,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
 
        flags = nilfs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = NILFS_I(inode)->i_flags;
 
@@ -186,7 +186,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        nilfs_mark_inode_dirty(inode);
        ret = nilfs_transaction_commit(inode->i_sb);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(filp);
        return ret;
 }
index 9e38daf..b2eff58 100644 (file)
@@ -1509,7 +1509,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(!S_ISDIR(vi->i_mode));
        /* If the bitmap attribute inode is in memory sync it, too. */
@@ -1532,7 +1532,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index 9d383e5..bed4d42 100644 (file)
@@ -1944,14 +1944,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written = 0;
        ssize_t err;
 
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = inode_to_bdi(vi);
        err = ntfs_prepare_file_for_write(iocb, from);
        if (iov_iter_count(from) && !err)
                written = ntfs_perform_write(file, from, iocb->ki_pos);
        current->backing_dev_info = NULL;
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        if (likely(written > 0)) {
                err = generic_write_sync(file, iocb->ki_pos, written);
                if (err < 0)
@@ -1996,7 +1996,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(S_ISDIR(vi->i_mode));
        if (!datasync || !NInoNonResident(NTFS_I(vi)))
@@ -2015,7 +2015,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index d80e331..9793e68 100644 (file)
@@ -48,7 +48,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
                ntfs_error(vol->sb, "Quota inodes are not open.");
                return false;
        }
-       mutex_lock(&vol->quota_q_ino->i_mutex);
+       inode_lock(vol->quota_q_ino);
        ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
        if (!ictx) {
                ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
        ntfs_index_entry_mark_dirty(ictx);
 set_done:
        ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        /*
         * We set the flag so we do not try to mark the quotas out of date
         * again on remount.
@@ -110,7 +110,7 @@ done:
 err_out:
        if (ictx)
                ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        return false;
 }
 
index 2f77f8d..1b38abd 100644 (file)
@@ -1284,10 +1284,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
         * Find the inode number for the hibernation file by looking up the
         * filename hiberfil.sys in the root directory.
         */
-       mutex_lock(&vol->root_ino->i_mutex);
+       inode_lock(vol->root_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
                        &name);
-       mutex_unlock(&vol->root_ino->i_mutex);
+       inode_unlock(vol->root_ino);
        if (IS_ERR_MREF(mref)) {
                ret = MREF_ERR(mref);
                /* If the file does not exist, Windows is not hibernated. */
@@ -1377,10 +1377,10 @@ static bool load_and_init_quota(ntfs_volume *vol)
         * Find the inode number for the quota file by looking up the filename
         * $Quota in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, quotas are disabled and have
@@ -1460,10 +1460,10 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
         * Find the inode number for the transaction log file by looking up the
         * filename $UsnJrnl in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, transaction logging is disabled,
index a3ded88..d002579 100644 (file)
@@ -5719,7 +5719,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5776,7 +5776,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 out_commit:
        ocfs2_commit_trans(osb, handle);
 out:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 bail:
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
@@ -5832,7 +5832,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
 
@@ -5980,7 +5980,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        di = (struct ocfs2_dinode *) tl_bh->b_data;
 
@@ -6008,7 +6008,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&data_alloc_inode->i_mutex);
+       inode_lock(data_alloc_inode);
 
        status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
        if (status < 0) {
@@ -6035,7 +6035,7 @@ out_unlock:
        ocfs2_inode_unlock(data_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&data_alloc_inode->i_mutex);
+       inode_unlock(data_alloc_inode);
        iput(data_alloc_inode);
 
 out:
@@ -6047,9 +6047,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        int status;
        struct inode *tl_inode = osb->osb_tl_inode;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        status = __ocfs2_flush_truncate_log(osb);
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6208,7 +6208,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
                (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
                num_recs);
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        for(i = 0; i < num_recs; i++) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
                        status = __ocfs2_flush_truncate_log(osb);
@@ -6239,7 +6239,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
        }
 
 bail_up:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6346,7 +6346,7 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = ocfs2_inode_lock(inode, &di_bh, 1);
        if (ret) {
@@ -6395,7 +6395,7 @@ out_unlock:
        ocfs2_inode_unlock(inode, 1);
        brelse(di_bh);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
 out:
        while(head) {
@@ -6439,7 +6439,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
        handle_t *handle;
        int ret = 0;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        while (head) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
@@ -6471,7 +6471,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
                }
        }
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        while (head) {
                /* Premature exit may have left some dangling items. */
@@ -7355,7 +7355,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
        if (ret < 0) {
@@ -7422,7 +7422,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 0);
        brelse(main_bm_bh);
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 out:
        return ret;
index 7f60472..794fd15 100644 (file)
@@ -2046,9 +2046,9 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
        int ret = 0;
        unsigned int truncated_clusters;
 
-       mutex_lock(&osb->osb_tl_inode->i_mutex);
+       inode_lock(osb->osb_tl_inode);
        truncated_clusters = osb->truncated_clusters;
-       mutex_unlock(&osb->osb_tl_inode->i_mutex);
+       inode_unlock(osb->osb_tl_inode);
 
        /*
         * Check whether we can succeed in allocating if we free
index a3cc6d2..a76b9ea 100644 (file)
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
 
 void o2hb_exit(void)
 {
-       kfree(o2hb_db_livenodes);
-       kfree(o2hb_db_liveregions);
-       kfree(o2hb_db_quorumregions);
-       kfree(o2hb_db_failedregions);
        debugfs_remove(o2hb_debug_failedregions);
        debugfs_remove(o2hb_debug_quorumregions);
        debugfs_remove(o2hb_debug_liveregions);
        debugfs_remove(o2hb_debug_livenodes);
        debugfs_remove(o2hb_debug_dir);
+       kfree(o2hb_db_livenodes);
+       kfree(o2hb_db_liveregions);
+       kfree(o2hb_db_quorumregions);
+       kfree(o2hb_db_failedregions);
 }
 
 static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
 
        kfree(reg->hr_slots);
 
-       kfree(reg->hr_db_regnum);
-       kfree(reg->hr_db_livenodes);
        debugfs_remove(reg->hr_debug_livenodes);
        debugfs_remove(reg->hr_debug_regnum);
        debugfs_remove(reg->hr_debug_elapsed_time);
        debugfs_remove(reg->hr_debug_pinned);
        debugfs_remove(reg->hr_debug_dir);
+       kfree(reg->hr_db_livenodes);
+       kfree(reg->hr_db_regnum);
+       kfree(reg->hr_debug_elapsed_time);
+       kfree(reg->hr_debug_pinned);
 
        spin_lock(&o2hb_live_lock);
        list_del(&reg->hr_all_item);
index ffecf89..e1adf28 100644 (file)
@@ -4361,7 +4361,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&dx_alloc_inode->i_mutex);
+       inode_lock(dx_alloc_inode);
 
        ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
        if (ret) {
@@ -4410,7 +4410,7 @@ out_unlock:
        ocfs2_inode_unlock(dx_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&dx_alloc_inode->i_mutex);
+       inode_unlock(dx_alloc_inode);
        brelse(dx_alloc_bh);
 out:
        iput(dx_alloc_inode);
index c5bdf02..b94a425 100644 (file)
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
                                                break;
                                        }
                                }
+                               dlm_lockres_clear_refmap_bit(dlm, res,
+                                               dead_node);
                                spin_unlock(&res->spinlock);
                                continue;
                        }
index f92612e..474e57f 100644 (file)
@@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
        unsigned int gen;
        int noqueue_attempted = 0;
        int dlm_locked = 0;
+       int kick_dc = 0;
 
        if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
                mlog_errno(-EINVAL);
@@ -1524,7 +1525,12 @@ update_holders:
 unlock:
        lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
 
+       /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+       kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
+
        spin_unlock_irqrestore(&lockres->l_lock, flags);
+       if (kick_dc)
+               ocfs2_wake_downconvert_thread(osb);
 out:
        /*
         * This is helping work around a lock inversion between the page lock
index d631279..7cb38fd 100644 (file)
@@ -1872,7 +1872,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes on other nodes
@@ -1991,7 +1991,7 @@ out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -2299,7 +2299,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
        direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
 relock:
        /*
@@ -2435,7 +2435,7 @@ out:
                ocfs2_rw_unlock(inode, rw_level);
 
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (written)
                ret = written;
@@ -2547,7 +2547,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        switch (whence) {
        case SEEK_SET:
@@ -2585,7 +2585,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret)
                return ret;
        return offset;
index 97a563b..3629444 100644 (file)
@@ -630,10 +630,10 @@ static int ocfs2_remove_inode(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
 
                mlog_errno(status);
                goto bail;
@@ -680,7 +680,7 @@ bail_commit:
        ocfs2_commit_trans(osb, handle);
 bail_unlock:
        ocfs2_inode_unlock(inode_alloc_inode, 1);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
        brelse(inode_alloc_bh);
 bail:
        iput(inode_alloc_inode);
@@ -751,10 +751,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
                /* Lock the orphan dir. The lock will be held for the entire
                 * delete_inode operation. We do this now to avoid races with
                 * recovery completion on other nodes. */
-               mutex_lock(&orphan_dir_inode->i_mutex);
+               inode_lock(orphan_dir_inode);
                status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
                if (status < 0) {
-                       mutex_unlock(&orphan_dir_inode->i_mutex);
+                       inode_unlock(orphan_dir_inode);
 
                        mlog_errno(status);
                        goto bail;
@@ -803,7 +803,7 @@ bail_unlock_dir:
                return status;
 
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
 bail:
        iput(orphan_dir_inode);
index 16b0bb4..4506ec5 100644 (file)
@@ -86,7 +86,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
        unsigned oldflags;
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_inode_lock(inode, &bh, 1);
        if (status < 0) {
@@ -135,7 +135,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 bail_unlock:
        ocfs2_inode_unlock(inode, 1);
 bail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        brelse(bh);
 
@@ -287,7 +287,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
        struct ocfs2_dinode *dinode_alloc = NULL;
 
        if (inode_alloc)
-               mutex_lock(&inode_alloc->i_mutex);
+               inode_lock(inode_alloc);
 
        if (o2info_coherent(&fi->ifi_req)) {
                status = ocfs2_inode_lock(inode_alloc, &bh, 0);
@@ -317,7 +317,7 @@ bail:
                ocfs2_inode_unlock(inode_alloc, 0);
 
        if (inode_alloc)
-               mutex_unlock(&inode_alloc->i_mutex);
+               inode_unlock(inode_alloc);
 
        brelse(bh);
 
@@ -547,7 +547,7 @@ static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
        struct ocfs2_dinode *gb_dinode = NULL;
 
        if (gb_inode)
-               mutex_lock(&gb_inode->i_mutex);
+               inode_lock(gb_inode);
 
        if (o2info_coherent(&ffg->iff_req)) {
                status = ocfs2_inode_lock(gb_inode, &bh, 0);
@@ -604,7 +604,7 @@ bail:
                ocfs2_inode_unlock(gb_inode, 0);
 
        if (gb_inode)
-               mutex_unlock(&gb_inode->i_mutex);
+               inode_unlock(gb_inode);
 
        iput(gb_inode);
        brelse(bh);
index 3772a2d..61b833b 100644 (file)
@@ -2088,7 +2088,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
                return status;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
        if (status < 0) {
                mlog_errno(status);
@@ -2106,7 +2106,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 out_cluster:
        ocfs2_inode_unlock(orphan_dir_inode, 0);
 out:
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
        return status;
 }
@@ -2196,7 +2196,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                oi->ip_next_orphan = NULL;
 
                if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        ret = ocfs2_rw_lock(inode, 1);
                        if (ret < 0) {
                                mlog_errno(ret);
@@ -2235,7 +2235,7 @@ unlock_inode:
 unlock_rw:
                        ocfs2_rw_unlock(inode, 1);
 unlock_mutex:
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
 
                        /* clear dio flag in ocfs2_inode_info */
                        oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
index e9c99e3..7d62c43 100644 (file)
@@ -414,7 +414,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -468,7 +468,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -506,7 +506,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_read_inode_block_full(inode, &alloc_bh,
                                             OCFS2_BH_IGNORE_CACHE);
@@ -539,7 +539,7 @@ bail:
        brelse(alloc_bh);
 
        if (inode) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                iput(inode);
        }
 
@@ -571,7 +571,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -601,7 +601,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
 
        brelse(main_bm_bh);
 
@@ -643,7 +643,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&local_alloc_inode->i_mutex);
+       inode_lock(local_alloc_inode);
 
        /*
         * We must double check state and allocator bits because
@@ -709,7 +709,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
        status = 0;
 bail:
        if (status < 0 && local_alloc_inode) {
-               mutex_unlock(&local_alloc_inode->i_mutex);
+               inode_unlock(local_alloc_inode);
                iput(local_alloc_inode);
        }
 
index 124471d..e3d05d9 100644 (file)
@@ -276,7 +276,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
         *      context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
         */
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -338,7 +338,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 
 out_unlock_mutex:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (context->data_ac) {
                ocfs2_free_alloc_context(context->data_ac);
@@ -632,7 +632,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out;
        }
 
-       mutex_lock(&gb_inode->i_mutex);
+       inode_lock(gb_inode);
 
        ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
        if (ret) {
@@ -640,7 +640,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out_unlock_gb_mutex;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
@@ -708,11 +708,11 @@ out_commit:
        brelse(gd_bh);
 
 out_unlock_tl_inode:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ocfs2_inode_unlock(gb_inode, 1);
 out_unlock_gb_mutex:
-       mutex_unlock(&gb_inode->i_mutex);
+       inode_unlock(gb_inode);
        brelse(gb_bh);
        iput(gb_inode);
 
@@ -905,7 +905,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes from other nodes
@@ -969,7 +969,7 @@ out_inode_unlock:
 out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return status;
 }
index ab42c38..6b3e871 100644 (file)
@@ -1045,7 +1045,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -1664,7 +1664,7 @@ bail:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2121,11 +2121,11 @@ static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
                return ret;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (ret < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
 
                mlog_errno(ret);
@@ -2226,7 +2226,7 @@ out:
 
        if (ret) {
                ocfs2_inode_unlock(orphan_dir_inode, 1);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
        }
 
@@ -2495,7 +2495,7 @@ out:
                        ocfs2_free_alloc_context(inode_ac);
 
                /* Unroll orphan dir locking */
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                ocfs2_inode_unlock(orphan_dir, 1);
                iput(orphan_dir);
        }
@@ -2602,7 +2602,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2689,7 +2689,7 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 
        ocfs2_free_dir_lookup_result(&orphan_insert);
@@ -2721,10 +2721,10 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                mlog_errno(status);
                goto bail;
@@ -2770,7 +2770,7 @@ bail_commit:
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
        iput(orphan_dir_inode);
 
@@ -2834,12 +2834,12 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
                goto leave;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
                mlog_errno(status);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                goto leave;
        }
@@ -2901,7 +2901,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 orphan_unlock:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 leave:
 
index fde9ef1..9c9dd30 100644 (file)
@@ -308,7 +308,7 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
                WARN_ON(bh != oinfo->dqi_gqi_bh);
        spin_unlock(&dq_data_lock);
        if (ex) {
-               mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+               inode_lock(oinfo->dqi_gqinode);
                down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        } else {
                down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -320,7 +320,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
 {
        if (ex) {
                up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
-               mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+               inode_unlock(oinfo->dqi_gqinode);
        } else {
                up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        }
index 2521198..3eff031 100644 (file)
@@ -807,7 +807,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
                        mlog_errno(ret);
                        goto out;
                }
-               mutex_lock(&alloc_inode->i_mutex);
+               inode_lock(alloc_inode);
 
                ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
                if (ret) {
@@ -867,7 +867,7 @@ out_unlock:
        }
 out_mutex:
        if (alloc_inode) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
        }
 out:
@@ -4197,7 +4197,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
                goto out;
        }
 
-       mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(new_inode, I_MUTEX_CHILD);
        ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
                                      OI_LS_REFLINK_TARGET);
        if (ret) {
@@ -4231,7 +4231,7 @@ inode_unlock:
        ocfs2_inode_unlock(new_inode, 1);
        brelse(new_bh);
 out_unlock:
-       mutex_unlock(&new_inode->i_mutex);
+       inode_unlock(new_inode);
 out:
        if (!ret) {
                ret = filemap_fdatawait(inode->i_mapping);
@@ -4402,11 +4402,11 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
                        return error;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = dquot_initialize(dir);
        if (!error)
                error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_create(dir, new_dentry);
        return error;
index 79b8021..576b9a0 100644 (file)
@@ -301,7 +301,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -375,7 +375,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -486,7 +486,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -590,7 +590,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
index fc6d25f..2f19aee 100644 (file)
@@ -141,7 +141,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
                if (ac->ac_which != OCFS2_AC_USE_LOCAL)
                        ocfs2_inode_unlock(inode, 1);
 
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                iput(inode);
                ac->ac_inode = NULL;
@@ -797,11 +797,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                return -EINVAL;
        }
 
-       mutex_lock(&alloc_inode->i_mutex);
+       inode_lock(alloc_inode);
 
        status = ocfs2_inode_lock(alloc_inode, &bh, 1);
        if (status < 0) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
 
                mlog_errno(status);
@@ -2875,10 +2875,10 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
                iput(inode_alloc_inode);
                mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
                     (u32)suballoc_slot, status);
@@ -2891,7 +2891,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
 
        ocfs2_inode_unlock(inode_alloc_inode, 0);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
 
        iput(inode_alloc_inode);
        brelse(alloc_bh);
index f0e241f..7d3d979 100644 (file)
@@ -2524,7 +2524,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&xb_alloc_inode->i_mutex);
+       inode_lock(xb_alloc_inode);
 
        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
        if (ret < 0) {
@@ -2549,7 +2549,7 @@ out_unlock:
        ocfs2_inode_unlock(xb_alloc_inode, 1);
        brelse(xb_alloc_bh);
 out_mutex:
-       mutex_unlock(&xb_alloc_inode->i_mutex);
+       inode_unlock(xb_alloc_inode);
        iput(xb_alloc_inode);
 out:
        brelse(blk_bh);
@@ -3619,17 +3619,17 @@ int ocfs2_xattr_set(struct inode *inode,
                }
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
                if (ret < 0) {
-                       mutex_unlock(&tl_inode->i_mutex);
+                       inode_unlock(tl_inode);
                        mlog_errno(ret);
                        goto cleanup;
                }
        }
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
                                        &xbs, &ctxt, ref_meta, &credits);
@@ -5460,7 +5460,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
                return ret;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5504,7 +5504,7 @@ out_commit:
 out:
        ocfs2_schedule_truncate_log_flush(osb, 1);
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
index b25b154..55bdc75 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -58,10 +58,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        if (ret)
                newattrs.ia_valid |= ret | ATTR_FORCE;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        /* Note any delegations or leases have already been broken: */
        ret = notify_change(dentry, &newattrs, NULL);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        return ret;
 }
 
@@ -510,7 +510,7 @@ static int chmod_common(struct path *path, umode_t mode)
        if (error)
                return error;
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chmod(path, mode);
        if (error)
                goto out_unlock;
@@ -518,7 +518,7 @@ retry_deleg:
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
@@ -593,11 +593,11 @@ retry_deleg:
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |=
                        ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chown(path, uid, gid);
        if (!error)
                error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index eff6319..d894e7c 100644 (file)
@@ -248,9 +248,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&newdentry->d_inode->i_mutex);
+       inode_lock(newdentry->d_inode);
        err = ovl_set_attr(newdentry, stat);
-       mutex_unlock(&newdentry->d_inode->i_mutex);
+       inode_unlock(newdentry->d_inode);
        if (err)
                goto out_cleanup;
 
index 692ceda..ed95272 100644 (file)
@@ -167,7 +167,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
-       mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(udir, I_MUTEX_PARENT);
        newdentry = lookup_one_len(dentry->d_name.name, upperdir,
                                   dentry->d_name.len);
        err = PTR_ERR(newdentry);
@@ -185,7 +185,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 out_dput:
        dput(newdentry);
 out_unlock:
-       mutex_unlock(&udir->i_mutex);
+       inode_unlock(udir);
        return err;
 }
 
@@ -258,9 +258,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&opaquedir->d_inode->i_mutex);
+       inode_lock(opaquedir->d_inode);
        err = ovl_set_attr(opaquedir, &stat);
-       mutex_unlock(&opaquedir->d_inode->i_mutex);
+       inode_unlock(opaquedir->d_inode);
        if (err)
                goto out_cleanup;
 
@@ -599,7 +599,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
        struct dentry *upper = ovl_dentry_upper(dentry);
        int err;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        err = -ESTALE;
        if (upper->d_parent == upperdir) {
                /* Don't let d_delete() think it can reset d_inode */
@@ -619,7 +619,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
         * now.
         */
        d_drop(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 
        return err;
 }
index bf996e5..49e2045 100644 (file)
@@ -63,9 +63,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
        if (!err) {
                upperdentry = ovl_dentry_upper(dentry);
 
-               mutex_lock(&upperdentry->d_inode->i_mutex);
+               inode_lock(upperdentry->d_inode);
                err = notify_change(upperdentry, attr, NULL);
-               mutex_unlock(&upperdentry->d_inode->i_mutex);
+               inode_unlock(upperdentry->d_inode);
        }
        ovl_drop_write(dentry);
 out:
index adcb139..fdaf28f 100644 (file)
@@ -228,7 +228,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
                                dput(dentry);
                        }
                }
-               mutex_unlock(&dir->d_inode->i_mutex);
+               inode_unlock(dir->d_inode);
        }
        revert_creds(old_cred);
        put_cred(override_cred);
@@ -399,7 +399,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
        loff_t res;
        struct ovl_dir_file *od = file->private_data;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        if (!file->f_pos)
                ovl_dir_reset(file);
 
@@ -429,7 +429,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
                res = offset;
        }
 out_unlock:
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return res;
 }
@@ -454,10 +454,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                        ovl_path_upper(dentry, &upperpath);
                        realfile = ovl_path_open(&upperpath, O_RDONLY);
                        smp_mb__before_spinlock();
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        if (!od->upperfile) {
                                if (IS_ERR(realfile)) {
-                                       mutex_unlock(&inode->i_mutex);
+                                       inode_unlock(inode);
                                        return PTR_ERR(realfile);
                                }
                                od->upperfile = realfile;
@@ -467,7 +467,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                                        fput(realfile);
                                realfile = od->upperfile;
                        }
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                }
        }
 
@@ -479,9 +479,9 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
        struct ovl_dir_file *od = file->private_data;
 
        if (od->cache) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ovl_cache_put(od, file->f_path.dentry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        fput(od->realfile);
        if (od->upperfile)
@@ -557,7 +557,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 {
        struct ovl_cache_entry *p;
 
-       mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
        list_for_each_entry(p, list, l_node) {
                struct dentry *dentry;
 
@@ -575,5 +575,5 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
                        ovl_cleanup(upper->d_inode, dentry);
                dput(dentry);
        }
-       mutex_unlock(&upper->d_inode->i_mutex);
+       inode_unlock(upper->d_inode);
 }
index d250604..8d826bd 100644 (file)
@@ -229,7 +229,7 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
        WARN_ON(oe->__upperdentry);
        BUG_ON(!upperdentry->d_inode);
        /*
@@ -244,7 +244,7 @@ void ovl_dentry_version_inc(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        oe->version++;
 }
 
@@ -252,7 +252,7 @@ u64 ovl_dentry_version_get(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        return oe->version;
 }
 
@@ -375,9 +375,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 {
        struct dentry *dentry;
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = lookup_one_len(name->name, dir, name->len);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (IS_ERR(dentry)) {
                if (PTR_ERR(dentry) == -ENOENT)
@@ -744,7 +744,7 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
        if (err)
                return ERR_PTR(err);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
 retry:
        work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
                              strlen(OVL_WORKDIR_NAME));
@@ -770,7 +770,7 @@ retry:
                        goto out_dput;
        }
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        mnt_drop_write(mnt);
 
        return work;
index 42cf8dd..ab8dad3 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned int pipe_min_size = PAGE_SIZE;
 
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
        return retval;
 }
 
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+                                 unsigned long old, unsigned long new)
+{
+       atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+       return pipe_user_pages_soft &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+       return pipe_user_pages_hard &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
 struct pipe_inode_info *alloc_pipe_info(void)
 {
        struct pipe_inode_info *pipe;
 
        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
        if (pipe) {
-               pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+               unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+               struct user_struct *user = get_current_user();
+
+               if (!too_many_pipe_buffers_hard(user)) {
+                       if (too_many_pipe_buffers_soft(user))
+                               pipe_bufs = 1;
+                       pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+               }
+
                if (pipe->bufs) {
                        init_waitqueue_head(&pipe->wait);
                        pipe->r_counter = pipe->w_counter = 1;
-                       pipe->buffers = PIPE_DEF_BUFFERS;
+                       pipe->buffers = pipe_bufs;
+                       pipe->user = user;
+                       account_pipe_buffers(pipe, 0, pipe_bufs);
                        mutex_init(&pipe->mutex);
                        return pipe;
                }
+               free_uid(user);
                kfree(pipe);
        }
 
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 {
        int i;
 
+       account_pipe_buffers(pipe, pipe->buffers, 0);
+       free_uid(pipe->user);
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
                if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
                        memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
        }
 
+       account_pipe_buffers(pipe, pipe->buffers, nr_pages);
        pipe->curbuf = 0;
        kfree(pipe->bufs);
        pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
                if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
                        ret = -EPERM;
                        goto out;
+               } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+                           too_many_pipe_buffers_soft(pipe->user)) &&
+                          !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+                       ret = -EPERM;
+                       goto out;
                }
                ret = pipe_set_size(pipe, nr_pages);
                break;
index 92e6726..a939f5e 100644 (file)
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, proc_root_kcore->size);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 67e8db4..b6a8d35 100644 (file)
@@ -50,7 +50,7 @@ int proc_setup_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *self;
        
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        self = d_alloc_name(s->s_root, "self");
        if (self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -69,7 +69,7 @@ int proc_setup_self(struct super_block *s)
        } else {
                self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(self)) {
                pr_err("proc_fill_super: can't allocate /proc/self\n");
                return PTR_ERR(self);
index 71ffc91..fa95ab2 100644 (file)
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
                                sizeof(struct proc_maps_private));
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= vma->vm_mm->start_stack &&
+                       vma->vm_end >= vma->vm_mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
 
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
        name = arch_vma_name(vma);
        if (!name) {
-               pid_t tid;
-
                if (!mm) {
                        name = "[vdso]";
                        goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                        goto done;
                }
 
-               tid = pid_of_stack(priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack)) {
-                               name = "[stack]";
-                       } else {
-                               /* Thread stack in /proc/PID/maps */
-                               seq_pad(m, ' ');
-                               seq_printf(m, "[stack:%d]", tid);
-                       }
-               }
+               if (is_stack(priv, vma, is_pid))
+                       name = "[stack]";
        }
 
 done:
@@ -602,7 +593,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                smaps_pmd_entry(pmd, addr, walk);
                spin_unlock(ptl);
                return 0;
@@ -913,7 +905,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        spinlock_t *ptl;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
                        clear_soft_dirty_pmd(vma, addr, pmd);
                        goto out;
@@ -1187,7 +1180,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
        int err = 0;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmdp, vma);
+       if (ptl) {
                u64 flags = 0, frame = 0;
                pmd_t pmd = *pmdp;
 
@@ -1519,7 +1513,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
        pte_t *orig_pte;
        pte_t *pte;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
@@ -1548,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
+       pte_t huge_pte = huge_ptep_get(pte);
        struct numa_maps *md;
        struct page *page;
 
-       if (!pte_present(*pte))
+       if (!pte_present(huge_pte))
                return 0;
 
-       page = pte_page(*pte);
+       page = pte_page(huge_pte);
        if (!page)
                return 0;
 
        md = walk->private;
-       gather_stats(page, md, pte_dirty(*pte), 1);
+       gather_stats(page, md, pte_dirty(huge_pte), 1);
        return 0;
 }
 
@@ -1613,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
                seq_file_path(m, file, "\n\t= ");
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_puts(m, " heap");
-       } else {
-               pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_puts(m, " stack");
-                       else
-                               seq_printf(m, " stack:%d", tid);
-               }
+       } else if (is_stack(proc_priv, vma, is_pid)) {
+               seq_puts(m, " stack");
        }
 
        if (is_vm_hugetlb_page(vma))
index e0d64c9..faacb0c 100644 (file)
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
        return size;
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
-
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+       struct mm_struct *mm = vma->vm_mm;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= mm->start_stack &&
+                       vma->vm_end >= mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
+
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 /*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
        if (file) {
                seq_pad(m, ' ');
                seq_file_path(m, file, "");
-       } else if (mm) {
-               pid_t tid = pid_of_stack(priv, vma, is_pid);
-
-               if (tid != 0) {
-                       seq_pad(m, ' ');
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_printf(m, "[stack]");
-                       else
-                               seq_printf(m, "[stack:%d]", tid);
-               }
+       } else if (mm && is_stack(priv, vma, is_pid)) {
+               seq_pad(m, ' ');
+               seq_printf(m, "[stack]");
        }
 
        seq_putc(m, '\n');
index 9eacd59..e58a31e 100644 (file)
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *thread_self;
 
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        thread_self = d_alloc_name(s->s_root, "thread-self");
        if (thread_self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -71,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
        } else {
                thread_self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(thread_self)) {
                pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
                return PTR_ERR(thread_self);
index d8c439d..dc645b6 100644 (file)
@@ -377,7 +377,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
                break;
        }
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, name);
        if (!dentry)
@@ -397,12 +397,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
        list_add(&private->list, &allpstore);
        spin_unlock_irqrestore(&allpstore_lock, flags);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return 0;
 
 fail_lockedalloc:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        kfree(private);
 fail_alloc:
        iput(inode);
index fbd70af..3c3b81b 100644 (file)
@@ -682,9 +682,9 @@ int dquot_quota_sync(struct super_block *sb, int type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
-               mutex_lock(&dqopt->files[cnt]->i_mutex);
+               inode_lock(dqopt->files[cnt]);
                truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
-               mutex_unlock(&dqopt->files[cnt]->i_mutex);
+               inode_unlock(dqopt->files[cnt]);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 
@@ -2162,12 +2162,12 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
                        /* If quota was reenabled in the meantime, we have
                         * nothing to do */
                        if (!sb_has_quota_loaded(sb, cnt)) {
-                               mutex_lock(&toputinode[cnt]->i_mutex);
+                               inode_lock(toputinode[cnt]);
                                toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
                                  S_NOATIME | S_NOQUOTA);
                                truncate_inode_pages(&toputinode[cnt]->i_data,
                                                     0);
-                               mutex_unlock(&toputinode[cnt]->i_mutex);
+                               inode_unlock(toputinode[cnt]);
                                mark_inode_dirty_sync(toputinode[cnt]);
                        }
                        mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2258,11 +2258,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                /* We don't want quota and atime on quota files (deadlocks
                 * possible) Also nobody should write to the file - we use
                 * special IO operations which ignore the immutable bit. */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
                                             S_NOQUOTA);
                inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * When S_NOQUOTA is set, remove dquot references as no more
                 * references can be added
@@ -2305,12 +2305,12 @@ out_file_init:
        iput(inode);
 out_lock:
        if (oldflags != -1) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Set the flags back (in the case of accidental quotaon()
                 * on a wrong file we don't want to mess up the flags) */
                inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
                inode->i_flags |= oldflags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
@@ -2430,9 +2430,9 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
        struct dentry *dentry;
        int error;
 
-       mutex_lock(&d_inode(sb->s_root)->i_mutex);
+       inode_lock(d_inode(sb->s_root));
        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
-       mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+       inode_unlock(d_inode(sb->s_root));
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
index 06b07d5..324ec27 100644 (file)
@@ -238,7 +238,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file_inode(file);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case SEEK_END:
                        offset += i_size_read(inode);
@@ -283,7 +283,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
                retval = offset;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 EXPORT_SYMBOL(default_llseek);
@@ -1656,6 +1656,9 @@ next_file:
                mnt_drop_write_file(dst_file);
 next_loop:
                fdput(dst_fd);
+
+               if (fatal_signal_pending(current))
+                       goto out;
        }
 
 out:
index ced6791..e69ef3b 100644 (file)
@@ -44,7 +44,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
                fsnotify_access(file);
                file_accessed(file);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return res;
 }
index 4a024e2..3abd400 100644 (file)
@@ -38,11 +38,11 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        reiserfs_write_lock(inode->i_sb);
        err = reiserfs_commit_for_inode(inode);
        reiserfs_write_unlock(inode->i_sb);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (err < 0)
                return err;
        return 0;
index 96a1bcf..9424a4b 100644 (file)
@@ -158,7 +158,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        BUG_ON(!S_ISREG(inode->i_mode));
        err = sync_mapping_buffers(inode->i_mapping);
        reiserfs_write_lock(inode->i_sb);
@@ -166,7 +166,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        reiserfs_write_unlock(inode->i_sb);
        if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (barrier_done < 0)
                return barrier_done;
        return (err < 0) ? -EIO : 0;
index 6ec8a30..036a1fc 100644 (file)
@@ -224,7 +224,7 @@ out_unlock:
        page_cache_release(page);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        reiserfs_write_unlock(inode->i_sb);
        return retval;
 }
index 05db747..c0306ec 100644 (file)
@@ -288,7 +288,7 @@ static int finish_unfinished(struct super_block *s)
                pathrelse(&path);
 
                inode = reiserfs_iget(s, &obj_key);
-               if (!inode) {
+               if (IS_ERR_OR_NULL(inode)) {
                        /*
                         * the unlink almost completed, it just did not
                         * manage to remove "save" link and release objectid
index e5ddb4e..57e0b23 100644 (file)
 #ifdef CONFIG_REISERFS_FS_XATTR
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->create(dir, dentry, mode, true);
 }
 #endif
 
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -85,11 +85,11 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->unlink(dir, dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        if (!error)
                d_delete(dentry);
@@ -100,13 +100,13 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->rmdir(dir, dentry);
        if (!error)
                d_inode(dentry)->i_flags |= S_DEAD;
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        if (!error)
                d_delete(dentry);
 
@@ -123,7 +123,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
        if (d_really_is_negative(privroot))
                return ERR_PTR(-ENODATA);
 
-       mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
 
        xaroot = dget(REISERFS_SB(sb)->xattr_root);
        if (!xaroot)
@@ -139,7 +139,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(privroot)->i_mutex);
+       inode_unlock(d_inode(privroot));
        return xaroot;
 }
 
@@ -156,7 +156,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
 
-       mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
 
        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
        if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
@@ -170,7 +170,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(xaroot)->i_mutex);
+       inode_unlock(d_inode(xaroot));
        dput(xaroot);
        return xadir;
 }
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
                container_of(ctx, struct reiserfs_dentry_buf, ctx);
        struct dentry *dentry;
 
-       WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
 
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
@@ -254,7 +254,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                goto out_dir;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
 
        buf.xadir = dir;
        while (1) {
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                        break;
                buf.count = 0;
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        cleanup_dentry_buf(&buf);
 
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                if (!err) {
                        int jerror;
 
-                       mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+                       inode_lock_nested(d_inode(dir->d_parent),
                                          I_MUTEX_XATTR);
                        err = action(dir, data);
                        reiserfs_write_lock(inode->i_sb);
                        jerror = journal_end(&th);
                        reiserfs_write_unlock(inode->i_sb);
-                       mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+                       inode_unlock(d_inode(dir->d_parent));
                        err = jerror ?: err;
                }
        }
@@ -384,7 +384,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (IS_ERR(xadir))
                return ERR_CAST(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        xafile = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(xafile)) {
                err = PTR_ERR(xafile);
@@ -404,7 +404,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (err)
                dput(xafile);
 out:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        if (err)
                return ERR_PTR(err);
@@ -469,7 +469,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        if (IS_ERR(xadir))
                return PTR_ERR(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        dentry = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -483,7 +483,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
 
        dput(dentry);
 out_dput:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        return err;
 }
@@ -580,11 +580,11 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                        .ia_valid = ATTR_SIZE | ATTR_CTIME,
                };
 
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
                inode_dio_wait(d_inode(dentry));
 
                err = reiserfs_setattr(dentry, &newattrs);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
        } else
                update_ctime(inode);
 out_unlock:
@@ -888,9 +888,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                goto out;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
        err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        if (!err)
                err = buf.pos;
@@ -905,7 +905,7 @@ static int create_privroot(struct dentry *dentry)
        int err;
        struct inode *inode = d_inode(dentry->d_parent);
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        err = xattr_mkdir(inode, dentry, 0700);
        if (err || d_really_is_negative(dentry)) {
@@ -995,7 +995,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
        int err = 0;
 
        /* If we don't have the privroot located yet - go find it */
-       mutex_lock(&d_inode(s->s_root)->i_mutex);
+       inode_lock(d_inode(s->s_root));
        dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                strlen(PRIVROOT_NAME));
        if (!IS_ERR(dentry)) {
@@ -1005,7 +1005,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
                        d_inode(dentry)->i_flags |= S_PRIVATE;
        } else
                err = PTR_ERR(dentry);
-       mutex_unlock(&d_inode(s->s_root)->i_mutex);
+       inode_unlock(d_inode(s->s_root));
 
        return err;
 }
@@ -1025,14 +1025,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                goto error;
 
        if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
-               mutex_lock(&d_inode(s->s_root)->i_mutex);
+               inode_lock(d_inode(s->s_root));
                err = create_privroot(REISERFS_SB(s)->priv_root);
-               mutex_unlock(&d_inode(s->s_root)->i_mutex);
+               inode_unlock(d_inode(s->s_root));
        }
 
        if (d_really_is_positive(privroot)) {
                s->s_xattr = reiserfs_xattr_handlers;
-               mutex_lock(&d_inode(privroot)->i_mutex);
+               inode_lock(d_inode(privroot));
                if (!REISERFS_SB(s)->xattr_root) {
                        struct dentry *dentry;
 
@@ -1043,7 +1043,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                        else
                                err = PTR_ERR(dentry);
                }
-               mutex_unlock(&d_inode(privroot)->i_mutex);
+               inode_unlock(d_inode(privroot));
        }
 
 error:
index b94fa6c..053818d 100644 (file)
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
        if (isalarm(ctx))
                remaining = alarm_expires_remaining(&ctx->t.alarm);
        else
-               remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+               remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
index c66f242..4a0e48f 100644 (file)
@@ -84,9 +84,9 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
         * the files within the tracefs system. It is up to the individual
         * mkdir routine to handle races.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ret = tracefs_ops.mkdir(name);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        kfree(name);
 
@@ -109,13 +109,13 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
         * This time we need to unlock not only the parent (inode) but
         * also the directory that is being deleted.
         */
-       mutex_unlock(&inode->i_mutex);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(inode);
+       inode_unlock(dentry->d_inode);
 
        ret = tracefs_ops.rmdir(name);
 
-       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock_nested(inode, I_MUTEX_PARENT);
+       inode_lock(dentry->d_inode);
 
        kfree(name);
 
@@ -334,7 +334,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = tracefs_mount->mnt_root;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && dentry->d_inode) {
                dput(dentry);
@@ -342,7 +342,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&parent->d_inode->i_mutex);
+               inode_unlock(parent->d_inode);
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        }
 
@@ -351,7 +351,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        dput(dentry);
        simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        return NULL;
@@ -359,7 +359,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        return dentry;
 }
 
@@ -544,9 +544,9 @@ void tracefs_remove(struct dentry *dentry)
        if (!parent || !parent->d_inode)
                return;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        ret = __tracefs_remove(dentry, parent);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (!ret)
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
@@ -572,7 +572,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -587,7 +587,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&parent->d_inode->i_mutex);
+                       inode_unlock(parent->d_inode);
                        parent = child;
                        goto down;
                }
@@ -608,10 +608,10 @@ void tracefs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
 
        if (child != dentry)
                /* go up */
@@ -619,7 +619,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        if (!__tracefs_remove(child, parent))
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
 }
 
 /**
index e49bd28..795992a 100644 (file)
@@ -515,8 +515,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
        dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
 
        err = dbg_check_synced_i_size(c, inode);
        if (err)
@@ -572,8 +572,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
@@ -661,8 +661,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 
        dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
                inode->i_ino, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = check_dir_empty(c, d_inode(dentry));
        if (err)
                return err;
@@ -996,10 +996,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
                old_dentry, old_inode->i_ino, old_dir->i_ino,
                new_dentry, new_dir->i_ino);
-       ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+       ubifs_assert(inode_is_locked(old_dir));
+       ubifs_assert(inode_is_locked(new_dir));
        if (unlink)
-               ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+               ubifs_assert(inode_is_locked(new_inode));
 
 
        if (unlink && is_dir) {
index eff6280..065c88f 100644 (file)
@@ -1317,7 +1317,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* Synchronize the inode unless this is a 'datasync()' call. */
        if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
@@ -1332,7 +1332,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
         */
        err = ubifs_sync_wbufs_by_inode(c, inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index e53292d..c7f4d43 100644 (file)
@@ -313,7 +313,7 @@ static int setxattr(struct inode *host, const char *name, const void *value,
        union ubifs_key key;
        int err, type;
 
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        if (size > UBIFS_MAX_INO_DATA)
                return -ERANGE;
@@ -550,7 +550,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
 
        dbg_gen("xattr '%s', ino %lu ('%pd')", name,
                host->i_ino, dentry);
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        err = check_namespace(&nm);
        if (err < 0)
index bddf3d0..1af9896 100644 (file)
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct udf_inode_info *iinfo = UDF_I(inode);
        int err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        retval = generic_write_checks(iocb, from);
        if (retval <= 0)
@@ -136,7 +136,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                                (udf_file_entry_alloc_offset(inode) + end)) {
                        err = udf_expand_file_adinicb(inode);
                        if (err) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                udf_debug("udf_expand_adinicb: err=%d\n", err);
                                return err;
                        }
@@ -149,7 +149,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        retval = __generic_file_write_iter(iocb, from);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval > 0) {
                mark_inode_dirty(inode);
@@ -223,12 +223,12 @@ static int udf_release_file(struct inode *inode, struct file *filp)
                 * Grab i_mutex to avoid races with writes changing i_size
                 * while we are running.
                 */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                down_write(&UDF_I(inode)->i_data_sem);
                udf_discard_prealloc(inode);
                udf_truncate_tail_extent(inode);
                up_write(&UDF_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 87dc16d..166d3ed 100644 (file)
@@ -262,7 +262,7 @@ int udf_expand_file_adinicb(struct inode *inode)
                .nr_to_write = 1,
        };
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
        if (!iinfo->i_lenAlloc) {
                if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
                        iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
index 0fbb4c7..a522c15 100644 (file)
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
 {
        int i;
        int nr_groups = bitmap->s_nr_groups;
-       int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-                                               nr_groups);
 
        for (i = 0; i < nr_groups; i++)
                if (bitmap->s_block_bitmap[i])
                        brelse(bitmap->s_block_bitmap[i]);
 
-       if (size <= PAGE_SIZE)
-               kfree(bitmap);
-       else
-               vfree(bitmap);
+       kvfree(bitmap);
 }
 
 static void udf_free_partition(struct udf_part_map *map)
index aa138d6..85c40f4 100644 (file)
@@ -103,9 +103,9 @@ static int utimes_common(struct path *path, struct timespec *times)
                }
        }
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index d5dd6c8..07d0e47 100644 (file)
@@ -129,7 +129,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_setxattr(dentry, name, value, size, flags);
        if (error)
                goto out;
@@ -137,7 +137,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -277,7 +277,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_removexattr(dentry, name);
        if (error)
                goto out;
@@ -290,7 +290,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_removexattr);
index e2536bb..dc97eb2 100644 (file)
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 
 /*
  * Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
  */
 #define XFS_DIFLAG_REALTIME_BIT  0     /* file's blocks come from rt area */
 #define XFS_DIFLAG_PREALLOC_BIT  1     /* file space has been preallocated */
@@ -1025,6 +1023,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
         XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 
+/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT    0       /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX                (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY                (XFS_DIFLAG2_DAX)
+
 /*
  * Inode number format:
  * low inopblog bits - offset in block
index b2b73a9..fffe3d0 100644 (file)
@@ -35,40 +35,6 @@ struct dioattr {
 };
 #endif
 
-/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
-       __u32           fsx_xflags;     /* xflags field value (get/set) */
-       __u32           fsx_extsize;    /* extsize field value (get/set)*/
-       __u32           fsx_nextents;   /* nextents field value (get)   */
-       __u32           fsx_projid;     /* project identifier (get/set) */
-       unsigned char   fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME     0x00000001      /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC     0x00000002      /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE    0x00000008      /* file cannot be modified */
-#define XFS_XFLAG_APPEND       0x00000010      /* all writes append */
-#define XFS_XFLAG_SYNC         0x00000020      /* all writes synchronous */
-#define XFS_XFLAG_NOATIME      0x00000040      /* do not update access time */
-#define XFS_XFLAG_NODUMP       0x00000080      /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT    0x00000100      /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
-#define XFS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
-#define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
-
 /*
  * Structure for XFS_IOC_GETBMAP.
  * On input, fill in bmv_offset and bmv_length of the first structure
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
 #define XFS_IOC_ALLOCSP                _IOW ('X', 10, struct xfs_flock64)
 #define XFS_IOC_FREESP         _IOW ('X', 11, struct xfs_flock64)
 #define XFS_IOC_DIOINFO                _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR     _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR     _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR     FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR     FS_IOC_FSSETXATTR
 #define XFS_IOC_ALLOCSP64      _IOW ('X', 36, struct xfs_flock64)
 #define XFS_IOC_FREESP64       _IOW ('X', 37, struct xfs_flock64)
 #define XFS_IOC_GETBMAP                _IOWR('X', 38, struct getbmap)
index daed4bf..435c7de 100644 (file)
@@ -1527,6 +1527,16 @@ xfs_wait_buftarg(
        LIST_HEAD(dispose);
        int loop = 0;
 
+       /*
+        * We need to flush the buffer workqueue to ensure that all IO
+        * completion processing is 100% done. Just waiting on buffer locks is
+        * not sufficient for async IO as the reference count held over IO is
+        * not released until after the buffer lock is dropped. Hence we need to
+        * ensure here that all reference counts have been dropped before we
+        * start walking the LRU list.
+        */
+       drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
        /* loop until there is nothing left on the lru list. */
        while (list_lru_count(&btp->bt_lru)) {
                list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
index ebe9b82..52883ac 100644 (file)
@@ -55,7 +55,7 @@ xfs_rw_ilock(
        int                     type)
 {
        if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
+               inode_lock(VFS_I(ip));
        xfs_ilock(ip, type);
 }
 
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
 {
        xfs_iunlock(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
 {
        xfs_ilock_demote(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 /*
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
 /*
  * pfn_mkwrite was originally inteneded to ensure we capture time stamp
  * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
  */
 static int
 xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else if (IS_DAX(inode))
+               ret = dax_pfn_mkwrite(vma, vmf);
        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
        sb_end_pagefault(inode->i_sb);
        return ret;
index ae3758a..ceba1a8 100644 (file)
@@ -610,60 +610,69 @@ __xfs_iflock(
 
 STATIC uint
 _xfs_dic2xflags(
-       __uint16_t              di_flags)
+       __uint16_t              di_flags,
+       uint64_t                di_flags2,
+       bool                    has_attr)
 {
        uint                    flags = 0;
 
        if (di_flags & XFS_DIFLAG_ANY) {
                if (di_flags & XFS_DIFLAG_REALTIME)
-                       flags |= XFS_XFLAG_REALTIME;
+                       flags |= FS_XFLAG_REALTIME;
                if (di_flags & XFS_DIFLAG_PREALLOC)
-                       flags |= XFS_XFLAG_PREALLOC;
+                       flags |= FS_XFLAG_PREALLOC;
                if (di_flags & XFS_DIFLAG_IMMUTABLE)
-                       flags |= XFS_XFLAG_IMMUTABLE;
+                       flags |= FS_XFLAG_IMMUTABLE;
                if (di_flags & XFS_DIFLAG_APPEND)
-                       flags |= XFS_XFLAG_APPEND;
+                       flags |= FS_XFLAG_APPEND;
                if (di_flags & XFS_DIFLAG_SYNC)
-                       flags |= XFS_XFLAG_SYNC;
+                       flags |= FS_XFLAG_SYNC;
                if (di_flags & XFS_DIFLAG_NOATIME)
-                       flags |= XFS_XFLAG_NOATIME;
+                       flags |= FS_XFLAG_NOATIME;
                if (di_flags & XFS_DIFLAG_NODUMP)
-                       flags |= XFS_XFLAG_NODUMP;
+                       flags |= FS_XFLAG_NODUMP;
                if (di_flags & XFS_DIFLAG_RTINHERIT)
-                       flags |= XFS_XFLAG_RTINHERIT;
+                       flags |= FS_XFLAG_RTINHERIT;
                if (di_flags & XFS_DIFLAG_PROJINHERIT)
-                       flags |= XFS_XFLAG_PROJINHERIT;
+                       flags |= FS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
-                       flags |= XFS_XFLAG_NOSYMLINKS;
+                       flags |= FS_XFLAG_NOSYMLINKS;
                if (di_flags & XFS_DIFLAG_EXTSIZE)
-                       flags |= XFS_XFLAG_EXTSIZE;
+                       flags |= FS_XFLAG_EXTSIZE;
                if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
-                       flags |= XFS_XFLAG_EXTSZINHERIT;
+                       flags |= FS_XFLAG_EXTSZINHERIT;
                if (di_flags & XFS_DIFLAG_NODEFRAG)
-                       flags |= XFS_XFLAG_NODEFRAG;
+                       flags |= FS_XFLAG_NODEFRAG;
                if (di_flags & XFS_DIFLAG_FILESTREAM)
-                       flags |= XFS_XFLAG_FILESTREAM;
+                       flags |= FS_XFLAG_FILESTREAM;
        }
 
+       if (di_flags2 & XFS_DIFLAG2_ANY) {
+               if (di_flags2 & XFS_DIFLAG2_DAX)
+                       flags |= FS_XFLAG_DAX;
+       }
+
+       if (has_attr)
+               flags |= FS_XFLAG_HASATTR;
+
        return flags;
 }
 
 uint
 xfs_ip2xflags(
-       xfs_inode_t             *ip)
+       struct xfs_inode        *ip)
 {
-       xfs_icdinode_t          *dic = &ip->i_d;
+       struct xfs_icdinode     *dic = &ip->i_d;
 
-       return _xfs_dic2xflags(dic->di_flags) |
-                               (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
 }
 
 uint
 xfs_dic2xflags(
-       xfs_dinode_t            *dip)
+       struct xfs_dinode       *dip)
 {
-       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
-                               (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+                               be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
 }
 
 /*
@@ -862,7 +871,8 @@ xfs_ialloc(
        case S_IFREG:
        case S_IFDIR:
                if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-                       uint    di_flags = 0;
+                       uint64_t        di_flags2 = 0;
+                       uint            di_flags = 0;
 
                        if (S_ISDIR(mode)) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
                                di_flags |= XFS_DIFLAG_NODEFRAG;
                        if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
                                di_flags |= XFS_DIFLAG_FILESTREAM;
+                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+                               di_flags2 |= XFS_DIFLAG2_DAX;
+
                        ip->i_d.di_flags |= di_flags;
+                       ip->i_d.di_flags2 |= di_flags2;
                }
                /* FALLTHROUGH */
        case S_IFLNK:
index d42738d..478d04e 100644 (file)
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
        unsigned int    xflags = start;
 
        if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
+               xflags |= FS_XFLAG_IMMUTABLE;
        else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
+               xflags &= ~FS_XFLAG_IMMUTABLE;
        if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
+               xflags |= FS_XFLAG_APPEND;
        else
-               xflags &= ~XFS_XFLAG_APPEND;
+               xflags &= ~FS_XFLAG_APPEND;
        if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
+               xflags |= FS_XFLAG_SYNC;
        else
-               xflags &= ~XFS_XFLAG_SYNC;
+               xflags &= ~FS_XFLAG_SYNC;
        if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
+               xflags |= FS_XFLAG_NOATIME;
        else
-               xflags &= ~XFS_XFLAG_NOATIME;
+               xflags &= ~FS_XFLAG_NOATIME;
        if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
+               xflags |= FS_XFLAG_NODUMP;
        else
-               xflags &= ~XFS_XFLAG_NODUMP;
+               xflags &= ~FS_XFLAG_NODUMP;
 
        return xflags;
 }
@@ -945,40 +945,51 @@ xfs_set_diflags(
        unsigned int            xflags)
 {
        unsigned int            di_flags;
+       uint64_t                di_flags2;
 
        /* can't set PREALLOC this way, just preserve it */
        di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
+       if (xflags & FS_XFLAG_NODUMP)
                di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_NODEFRAG)
+       if (xflags & FS_XFLAG_NODEFRAG)
                di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
+       if (xflags & FS_XFLAG_FILESTREAM)
                di_flags |= XFS_DIFLAG_FILESTREAM;
        if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
+               if (xflags & FS_XFLAG_RTINHERIT)
                        di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
+               if (xflags & FS_XFLAG_NOSYMLINKS)
                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+               if (xflags & FS_XFLAG_EXTSZINHERIT)
                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-               if (xflags & XFS_XFLAG_PROJINHERIT)
+               if (xflags & FS_XFLAG_PROJINHERIT)
                        di_flags |= XFS_DIFLAG_PROJINHERIT;
        } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
+               if (xflags & FS_XFLAG_REALTIME)
                        di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
+               if (xflags & FS_XFLAG_EXTSIZE)
                        di_flags |= XFS_DIFLAG_EXTSIZE;
        }
-
        ip->i_d.di_flags = di_flags;
+
+       /* diflags2 only valid for v3 inodes. */
+       if (ip->i_d.di_version < 3)
+               return;
+
+       di_flags2 = 0;
+       if (xflags & FS_XFLAG_DAX)
+               di_flags2 |= XFS_DIFLAG2_DAX;
+
+       ip->i_d.di_flags2 = di_flags2;
+
 }
 
 STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
        struct inode            *inode = VFS_I(ip);
        unsigned int            xflags = xfs_ip2xflags(ip);
 
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
        else
                inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                inode->i_flags |= S_APPEND;
        else
                inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                inode->i_flags |= S_SYNC;
        else
                inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
        else
                inode->i_flags &= ~S_NOATIME;
+       if (xflags & FS_XFLAG_DAX)
+               inode->i_flags |= S_DAX;
+       else
+               inode->i_flags &= ~S_DAX;
+
 }
 
 static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
 
        /* Can't change realtime flag if any extents are allocated. */
        if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
                return -EINVAL;
 
        /* If realtime flag is set then must have realtime device */
-       if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+       if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
                if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
                    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
                        return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
         * we have appropriate permission.
         */
        if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
-            (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+            (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;
 
@@ -1095,8 +1111,8 @@ out_cancel:
  * extent size hint validation is somewhat cumbersome. Rules are:
  *
  * 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
  * 4. can only be changed on regular files if no extents are allocated
  * 5. can be changed on directories at any time
  * 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
 {
        struct xfs_mount        *mp = ip->i_mount;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
                return -EINVAL;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
            !S_ISDIR(ip->i_d.di_mode))
                return -EINVAL;
 
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
                        return -EINVAL;
 
                if (XFS_IS_REALTIME_INODE(ip) ||
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                   (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
                        size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
                } else {
                        size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
                if (fa->fsx_extsize % size)
                        return -EINVAL;
        } else
-               fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+               fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
 
        return 0;
 }
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
 
        if (xfs_get_projid(ip) != fa->fsx_projid)
                return -EINVAL;
-       if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+       if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
            (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
                return -EINVAL;
 
index 06eafaf..76b71a1 100644 (file)
@@ -1205,8 +1205,8 @@ xfs_diflags_to_iflags(
                inode->i_flags |= S_SYNC;
        if (flags & XFS_DIFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
-       /* XXX: Also needs an on-disk per inode flag! */
-       if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+       if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+           ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
                inode->i_flags |= S_DAX;
 }
 
index dc62219..ade236e 100644 (file)
@@ -42,11 +42,11 @@ xfs_break_layouts(
        while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                xfs_iunlock(ip, *iolock);
                if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                error = break_layout(inode, true);
                *iolock = XFS_IOLOCK_EXCL;
                if (with_imutex)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                xfs_ilock(ip, *iolock);
        }
 
index aa67339..4f18fd9 100644 (file)
@@ -497,7 +497,6 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
 
        current->flags |= PF_MEMALLOC;
-       set_freezable();
 
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
index 717a298..dad8af3 100644 (file)
@@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **);
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
        pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
index 3d69c93..6361892 100644 (file)
@@ -204,6 +204,7 @@ struct crypto_ahash {
                      unsigned int keylen);
 
        unsigned int reqsize;
+       bool has_setkey;
        struct crypto_tfm base;
 };
 
@@ -375,6 +376,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
                        unsigned int keylen);
 
+static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
index 018afb2..a2bfd78 100644 (file)
@@ -30,6 +30,9 @@ struct alg_sock {
 
        struct sock *parent;
 
+       unsigned int refcnt;
+       unsigned int nokey_refcnt;
+
        const struct af_alg_type *type;
        void *private;
 };
@@ -50,9 +53,11 @@ struct af_alg_type {
        void (*release)(void *private);
        int (*setkey)(void *private, const u8 *key, unsigned int keylen);
        int (*accept)(void *private, struct sock *sk);
+       int (*accept_nokey)(void *private, struct sock *sk);
        int (*setauthsize)(void *private, unsigned int authsize);
 
        struct proto_ops *ops;
+       struct proto_ops *ops_nokey;
        struct module *owner;
        char name[14];
 };
@@ -67,6 +72,7 @@ int af_alg_register_type(const struct af_alg_type *type);
 int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
+void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
@@ -83,11 +89,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk)
        return (struct alg_sock *)sk;
 }
 
-static inline void af_alg_release_parent(struct sock *sk)
-{
-       sock_put(alg_sk(sk)->parent);
-}
-
 static inline void af_alg_init_completion(struct af_alg_completion *completion)
 {
        init_completion(&completion->completion);
index d8dd41f..fd8742a 100644 (file)
@@ -61,6 +61,8 @@ struct crypto_skcipher {
        unsigned int ivsize;
        unsigned int reqsize;
 
+       bool has_setkey;
+
        struct crypto_tfm base;
 };
 
@@ -305,6 +307,11 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
        return tfm->setkey(tfm, key, keylen);
 }
 
+static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_skcipher_reqtfm() - obtain cipher handle from request
  * @req: skcipher_request out of which the cipher handle is to be obtained
index 89d008d..fe5efad 100644 (file)
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
                             struct drm_atomic_state *state,
                             bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                                        struct drm_atomic_state *old_state);
 
index 7bfb063..461a055 100644 (file)
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+       return false;
+#else
+       return true;
+#endif
+}
+
 #endif
index 24ab178..fdb4705 100644 (file)
@@ -44,8 +44,6 @@ struct drm_dp_vcpi {
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@ struct drm_dp_vcpi {
 struct drm_dp_mst_port {
        struct kref kref;
 
-       /* if dpcd 1.2 device is on this port - its GUID info */
-       bool guid_valid;
-       u8 guid[16];
-
        u8 port_num;
        bool input;
        bool mcs;
@@ -110,10 +104,12 @@ struct drm_dp_mst_port {
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
        struct kref kref;
@@ -132,6 +128,9 @@ struct drm_dp_mst_branch {
        struct drm_dp_sideband_msg_tx *tx_slots[2];
        int last_seqno;
        bool link_address_sent;
+
+       /* global unique identifier to identify branch devices */
+       u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@ struct drm_dp_payload {
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr {
        struct drm_dp_sideband_msg_rx up_req_recv;
 
        /* pointer to info about the initial MST device */
-       struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+       struct mutex lock; /* protects mst_state + primary + dpcd */
 
        bool mst_state;
        struct drm_dp_mst_branch *mst_primary;
-       /* primary MST device GUID */
-       bool guid_valid;
-       u8 guid[16];
+
        u8 dpcd[DP_RECEIVER_CAP_SIZE];
        u8 sink_count;
        int pbn_div;
index d639049..553210c 100644 (file)
@@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_ONE          (1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK  (~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON      1LL
+#define DRM_FIXED_ALMOST_ONE   (DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
        return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
        return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+       if (a > 0)
+               return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+       else
+               return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
        unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b)
        return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+       s64 res;
+       bool a_neg = a < 0;
+       bool b_neg = b < 0;
+       u64 a_abs = a_neg ? -a : a;
+       u64 b_abs = b_neg ? -b : b;
+       u64 rem;
+
+       /* determine integer part */
+       u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+       /* determine fractional part */
+       {
+               u32 i = DRM_FIXED_POINT;
+
+               do {
+                       rem <<= 1;
+                       res_abs <<= 1;
+                       if (rem >= b_abs) {
+                               res_abs |= 1;
+                               rem -= b_abs;
+                       }
+               } while (--i != 0);
+       }
+
+       /* round up LSB */
+       {
+               u64 summand = (rem << 1) >= b_abs;
+
+               res_abs += summand;
+       }
+
+       res = (s64) res_abs;
+       if (a_neg ^ b_neg)
+               res = -res;
+       return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
        s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h
new file mode 100644 (file)
index 0000000..290c231
--- /dev/null
@@ -0,0 +1,112 @@
+#ifndef __LINUX_BCM963XX_NVRAM_H__
+#define __LINUX_BCM963XX_NVRAM_H__
+
+#include <linux/crc32.h>
+#include <linux/if_ether.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/*
+ * Broadcom BCM963xx SoC board nvram data structure.
+ *
+ * The nvram structure varies in size depending on the SoC board version. Use
+ * the appropriate minimum BCM963XX_NVRAM_*_SIZE define for the information
+ * you need instead of sizeof(struct bcm963xx_nvram) as this may change.
+ */
+
+#define BCM963XX_NVRAM_V4_SIZE         300
+#define BCM963XX_NVRAM_V5_SIZE         (1 * SZ_1K)
+
+#define BCM963XX_DEFAULT_PSI_SIZE      64
+
+enum bcm963xx_nvram_nand_part {
+       BCM963XX_NVRAM_NAND_PART_BOOT = 0,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_1,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_2,
+       BCM963XX_NVRAM_NAND_PART_DATA,
+       BCM963XX_NVRAM_NAND_PART_BBT,
+
+       __BCM963XX_NVRAM_NAND_NR_PARTS
+};
+
+struct bcm963xx_nvram {
+       u32     version;
+       char    bootline[256];
+       char    name[16];
+       u32     main_tp_number;
+       u32     psi_size;
+       u32     mac_addr_count;
+       u8      mac_addr_base[ETH_ALEN];
+       u8      __reserved1[2];
+       u32     checksum_v4;
+
+       u8      __reserved2[292];
+       u32     nand_part_offset[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u32     nand_part_size[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u8      __reserved3[388];
+       u32     checksum_v5;
+};
+
+#define BCM963XX_NVRAM_NAND_PART_OFFSET(nvram, part) \
+       bcm963xx_nvram_nand_part_offset(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_offset(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_offset[part] * SZ_1K;
+}
+
+#define BCM963XX_NVRAM_NAND_PART_SIZE(nvram, part) \
+       bcm963xx_nvram_nand_part_size(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_size(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_size[part] * SZ_1K;
+}
+
+/*
+ * bcm963xx_nvram_checksum - Verify nvram checksum
+ *
+ * @nvram: pointer to full size nvram data structure
+ * @expected_out: optional pointer to store expected checksum value
+ * @actual_out: optional pointer to store actual checksum value
+ *
+ * Return: 0 if the checksum is valid, otherwise -EINVAL
+ */
+static int __maybe_unused bcm963xx_nvram_checksum(
+       const struct bcm963xx_nvram *nvram,
+       u32 *expected_out, u32 *actual_out)
+{
+       u32 expected, actual;
+       size_t len;
+
+       if (nvram->version <= 4) {
+               expected = nvram->checksum_v4;
+               len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32);
+       } else {
+               expected = nvram->checksum_v5;
+               len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32);
+       }
+
+       /*
+        * Calculate the CRC32 value for the nvram with a checksum value
+        * of 0 without modifying or copying the nvram by combining:
+        * - The CRC32 of the nvram without the checksum value
+        * - The CRC32 of a zero checksum value (which is also 0)
+        */
+       actual = crc32_le_combine(
+               crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32));
+
+       if (expected_out)
+               *expected_out = expected;
+
+       if (actual_out)
+               *actual_out = actual;
+
+       return expected == actual ? 0 : -EINVAL;
+};
+
+#endif /* __LINUX_BCM963XX_NVRAM_H__ */
diff --git a/include/linux/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h
new file mode 100644 (file)
index 0000000..161c7b3
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef __LINUX_BCM963XX_TAG_H__
+#define __LINUX_BCM963XX_TAG_H__
+
+#include <linux/types.h>
+
+#define TAGVER_LEN             4       /* Length of Tag Version */
+#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
+#define SIG1_LEN               20      /* Company Signature 1 Length */
+#define SIG2_LEN               14      /* Company Signature 2 Length */
+#define BOARDID_LEN            16      /* Length of BoardId */
+#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
+#define CHIPID_LEN             6       /* Chip Id Length */
+#define IMAGE_LEN              10      /* Length of Length Field */
+#define ADDRESS_LEN            12      /* Length of Address field */
+#define IMAGE_SEQUENCE_LEN     4       /* Image sequence Length */
+#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
+#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
+#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
+#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
+#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
+
+#define NUM_PIRELLI            2
+#define IMAGETAG_CRC_START     0xFFFFFFFF
+
+#define PIRELLI_BOARDS { \
+       "AGPF-S0", \
+       "DWV-S0", \
+}
+
+/* Extended flash address, needs to be subtracted
+ * from bcm_tag flash image offsets.
+ */
+#define BCM963XX_EXTENDED_SIZE 0xBFC00000
+
+/*
+ * The broadcom firmware assumes the rootfs starts the image,
+ * therefore uses the rootfs start (flash_image_address)
+ * to determine where to flash the image.  Since we have the kernel first
+ * we have to give it the kernel address, but the crc uses the length
+ * associated with this address (root_length), which is added to the kernel
+ * length (kernel_length) to determine the length of image to flash and thus
+ * needs to be rootfs + deadcode (jffs2 EOF marker)
+*/
+
+struct bcm_tag {
+       /* 0-3: Version of the image tag */
+       char tag_version[TAGVER_LEN];
+       /* 4-23: Company Line 1 */
+       char sig_1[SIG1_LEN];
+       /*  24-37: Company Line 2 */
+       char sig_2[SIG2_LEN];
+       /* 38-43: Chip this image is for */
+       char chip_id[CHIPID_LEN];
+       /* 44-59: Board name */
+       char board_id[BOARDID_LEN];
+       /* 60-61: Map endianness -- 1 BE 0 LE */
+       char big_endian[ENDIANFLAG_LEN];
+       /* 62-71: Total length of image */
+       char total_length[IMAGE_LEN];
+       /* 72-83: Address in memory of CFE */
+       char cfe__address[ADDRESS_LEN];
+       /* 84-93: Size of CFE */
+       char cfe_length[IMAGE_LEN];
+       /* 94-105: Address in memory of image start
+        * (kernel for OpenWRT, rootfs for stock firmware)
+        */
+       char flash_image_start[ADDRESS_LEN];
+       /* 106-115: Size of rootfs */
+       char root_length[IMAGE_LEN];
+       /* 116-127: Address in memory of kernel */
+       char kernel_address[ADDRESS_LEN];
+       /* 128-137: Size of kernel */
+       char kernel_length[IMAGE_LEN];
+       /* 138-141: Image sequence number
+        * (to be incremented when flashed with a new image)
+        */
+       char image_sequence[IMAGE_SEQUENCE_LEN];
+       /* 142-161: RSA Signature (not used; some vendors may use this) */
+       char rsa_signature[RSASIG_LEN];
+       /* 162-191: Compilation and related information (not used in OpenWrt) */
+       char information1[TAGINFO1_LEN];
+       /* 192-195: Version flash layout */
+       char flash_layout_ver[FLASHLAYOUTVER_LEN];
+       /* 196-199: kernel+rootfs CRC32 */
+       __u32 fskernel_crc;
+       /* 200-215: Unused except on Alice Gate where is is information */
+       char information2[TAGINFO2_LEN];
+       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
+       __u32 image_crc;
+       /* 220-223: CRC32 of rootfs partition */
+       __u32 rootfs_crc;
+       /* 224-227: CRC32 of kernel partition */
+       __u32 kernel_crc;
+       /* 228-235: Unused at present */
+       char reserved1[8];
+       /* 236-239: CRC32 of header excluding last 20 bytes */
+       __u32 header_crc;
+       /* 240-255: Unused at present */
+       char reserved2[16];
+};
+
+#endif /* __LINUX_BCM63XX_TAG_H__ */
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644 (file)
index 77ae77c..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef BLK_IOPOLL_H
-#define BLK_IOPOLL_H
-
-struct blk_iopoll;
-typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
-
-struct blk_iopoll {
-       struct list_head list;
-       unsigned long state;
-       unsigned long data;
-       int weight;
-       int max;
-       blk_iopoll_fn *poll;
-};
-
-enum {
-       IOPOLL_F_SCHED          = 0,
-       IOPOLL_F_DISABLE        = 1,
-};
-
-/*
- * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
- * that we were the first to acquire this iop for scheduling. If this iop
- * is currently disabled, return "failure".
- */
-static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
-{
-       if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
-               return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
-
-       return 1;
-}
-
-static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-{
-       return test_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-
-extern void blk_iopoll_sched(struct blk_iopoll *);
-extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
-extern void blk_iopoll_complete(struct blk_iopoll *);
-extern void __blk_iopoll_complete(struct blk_iopoll *);
-extern void blk_iopoll_enable(struct blk_iopoll *);
-extern void blk_iopoll_disable(struct blk_iopoll *);
-
-#endif
index f89b31d..c1ef6f1 100644 (file)
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5   (1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +120,9 @@ static inline u64 ceph_sanitize_features(u64 features)
         CEPH_FEATURE_CRUSH_TUNABLES3 |         \
         CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
         CEPH_FEATURE_MSGR_KEEPALIVE2 |         \
-        CEPH_FEATURE_CRUSH_V4)
+        CEPH_FEATURE_CRUSH_V4 |                \
+        CEPH_FEATURE_CRUSH_TUNABLES5 |         \
+        CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
        (CEPH_FEATURE_NOSRCADDR |        \
index 5babb8e..b827e06 100644 (file)
@@ -40,46 +40,11 @@ static inline __u32 ceph_frag_mask_shift(__u32 f)
        return 24 - ceph_frag_bits(f);
 }
 
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
 {
        return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
 }
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
-       /* is sub as specific as us, and contained by us? */
-       return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
-              (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
 
-static inline __u32 ceph_frag_parent(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f) - 1,
-                        ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f),
-                     ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1,
-             ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
 static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
 {
        int newbits = ceph_frag_bits(f) + by;
index 71b1d6c..8dbd787 100644 (file)
@@ -220,6 +220,7 @@ struct ceph_connection {
        struct ceph_entity_addr actual_peer_addr;
 
        /* message out temps */
+       struct ceph_msg_header out_hdr;
        struct ceph_msg *out_msg;        /* sending message (== tail of
                                            out_sent) */
        bool out_msg_done;
@@ -229,7 +230,6 @@ struct ceph_connection {
        int out_kvec_left;   /* kvec's left in out_kvec */
        int out_skip;        /* skip this many bytes */
        int out_kvec_bytes;  /* total bytes left */
-       bool out_kvec_is_msg; /* kvec refers to out_msg */
        int out_more;        /* there is more data after the kvecs */
        __le64 out_temp_ack; /* for writing an ack */
        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
index bda5ec0..fccf7f4 100644 (file)
@@ -37,7 +37,7 @@ struct cleancache_ops {
        void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-       return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
        return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+       return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-       int ret = -1;
-
        if (cleancache_enabled && cleancache_fs_enabled(page))
-               ret = __cleancache_get_page(page);
-       return ret;
+               return __cleancache_get_page(page);
+       return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
index 48b4930..be8f12b 100644 (file)
@@ -59,7 +59,8 @@ enum {
        CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
        CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
        CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+       CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@ struct crush_map {
         * mappings line up a bit better with previous mappings. */
        __u8 chooseleaf_vary_r;
 
+       /* if true, it makes chooseleaf firstn to return stable results (if
+        * no local retry) so that data migrations would be optimal when some
+        * device fails. */
+       __u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
        /*
         * version 0 (original) of straw_calc has various flaws.  version 1
index b415e52..818e450 100644 (file)
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+               sector_t n)
+{
+       return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
                                unsigned int flags, get_block_t, dax_iodone_t);
@@ -36,4 +47,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
        return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
 }
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+       return mapping->host && IS_DAX(mapping->host);
+}
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+               loff_t end);
 #endif
index eb73d74..ae68100 100644 (file)
@@ -433,7 +433,8 @@ struct address_space {
        struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
-       unsigned long           nrshadows;      /* number of shadow entries */
+       /* number of shadow or DAX exceptional entries */
+       unsigned long           nrexceptional;
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
        unsigned long           flags;          /* error bits/gfp mask */
@@ -483,9 +484,6 @@ struct block_device {
        int                     bd_fsfreeze_count;
        /* Mutex for freeze */
        struct mutex            bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-       int                     bd_map_count;
-#endif
 };
 
 /*
@@ -714,6 +712,31 @@ enum inode_i_mutex_lock_class
        I_MUTEX_PARENT2,
 };
 
+static inline void inode_lock(struct inode *inode)
+{
+       mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+       mutex_unlock(&inode->i_mutex);
+}
+
+static inline int inode_trylock(struct inode *inode)
+{
+       return mutex_trylock(&inode->i_mutex);
+}
+
+static inline int inode_is_locked(struct inode *inode)
+{
+       return mutex_is_locked(&inode->i_mutex);
+}
+
+static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
+{
+       mutex_lock_nested(&inode->i_mutex, subclass);
+}
+
 void lock_two_nondirectories(struct inode *, struct inode*);
 void unlock_two_nondirectories(struct inode *, struct inode*);
 
@@ -2881,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
 
 static inline bool io_is_direct(struct file *filp)
 {
-       return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+       return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
@@ -3047,8 +3070,8 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
 }
 static inline bool dir_relax(struct inode *inode)
 {
-       mutex_unlock(&inode->i_mutex);
-       mutex_lock(&inode->i_mutex);
+       inode_unlock(inode);
+       inode_lock(inode);
        return !IS_DEADDIR(inode);
 }
 
index 0639dcc..81de712 100644 (file)
@@ -165,7 +165,6 @@ struct ftrace_ops {
        ftrace_func_t                   saved_func;
        int __percpu                    *disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-       int                             nr_trampolines;
        struct ftrace_ops_hash          local_hash;
        struct ftrace_ops_hash          *func_hash;
        struct ftrace_ops_hash          old_hash;
index 28ad5f6..af1f2b2 100644 (file)
@@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
                              unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
index 76dd4f0..2ead22d 100644 (file)
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:  timer expiry callback function
  * @base:      pointer to the timer base (per cpu and per clock)
  * @state:     state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:    Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *             started the timer
  * @start_site:        timer statistics field to store the site where the timer
  *             was started
@@ -101,7 +102,8 @@ struct hrtimer {
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
-       unsigned long                   state;
+       u8                              state;
+       u8                              is_rel;
 #ifdef CONFIG_TIMER_STATS
        int                             start_pid;
        void                            *start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+       ktime_t rem = ktime_sub(timer->node.expires, now);
+
+       /*
+        * Adjust relative timers for the extra we added in
+        * hrtimer_start_range_ns() to prevent short timeouts.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+               rem.tv64 -= hrtimer_resolution;
+       return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+       return __hrtimer_expires_remaining_adjusted(timer,
+                                                   timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+       return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
index cfe81e1..459fd25 100644 (file)
@@ -120,15 +120,15 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                    unsigned long start,
                                    unsigned long end,
                                    long adjust_next);
-extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl);
+extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma);
 /* mmap_sem must be held on entry */
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
        VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
        if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
-               return __pmd_trans_huge_lock(pmd, vma, ptl);
+               return __pmd_trans_huge_lock(pmd, vma);
        else
                return false;
 }
@@ -190,10 +190,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                         long adjust_next)
 {
 }
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
-       return false;
+       return NULL;
 }
 
 static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
index cb30edb..0e95fcc 100644 (file)
@@ -413,7 +413,7 @@ enum
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
        BLOCK_SOFTIRQ,
-       BLOCK_IOPOLL_SOFTIRQ,
+       IRQ_POLL_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
        HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
index f28dff3..a5c539f 100644 (file)
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
        int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
                                    phys_addr_t paddr, u64 size, int prot);
        void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-       /* Set the numer of window per domain */
+       /* Set the number of windows per domain */
        int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-       /* Get the numer of window per domain */
+       /* Get the number of windows per domain */
        u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644 (file)
index 0000000..3e8c1b8
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef IRQ_POLL_H
+#define IRQ_POLL_H
+
+struct irq_poll;
+typedef int (irq_poll_fn)(struct irq_poll *, int);
+
+struct irq_poll {
+       struct list_head list;
+       unsigned long state;
+       int weight;
+       irq_poll_fn *poll;
+};
+
+enum {
+       IRQ_POLL_F_SCHED        = 0,
+       IRQ_POLL_F_DISABLE      = 1,
+};
+
+extern void irq_poll_sched(struct irq_poll *);
+extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
+extern void irq_poll_complete(struct irq_poll *);
+extern void irq_poll_enable(struct irq_poll *);
+extern void irq_poll_disable(struct irq_poll *);
+
+#endif
index f64622a..04579d9 100644 (file)
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
        DOMAIN_BUS_ANY          = 0,
+       DOMAIN_BUS_WIRED,
        DOMAIN_BUS_PCI_MSI,
        DOMAIN_BUS_PLATFORM_MSI,
        DOMAIN_BUS_NEXUS,
index 9ae48d4..792c898 100644 (file)
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
        MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
        MEM_CGROUP_STAT_NSTATS,
        /* default hierarchy stats */
-       MEMCG_SOCK,
+       MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
        MEMCG_NR_STAT,
 };
 
index 58391f2..116b284 100644 (file)
@@ -206,7 +206,8 @@ enum {
        MLX4_SET_PORT_GID_TABLE = 0x5,
        MLX4_SET_PORT_PRIO2TC   = 0x8,
        MLX4_SET_PORT_SCHEDULER = 0x9,
-       MLX4_SET_PORT_VXLAN     = 0xB
+       MLX4_SET_PORT_VXLAN     = 0xB,
+       MLX4_SET_PORT_ROCE_ADDR = 0xD
 };
 
 enum {
index d3133be..430a929 100644 (file)
@@ -216,6 +216,7 @@ enum {
        MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN      = 1LL <<  30,
        MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
        MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
+       MLX4_DEV_CAP_FLAG2_ROCE_V1_V2           = 1ULL <<  33,
 };
 
 enum {
@@ -267,12 +268,14 @@ enum {
        MLX4_BMME_FLAG_TYPE_2_WIN       = 1 <<  9,
        MLX4_BMME_FLAG_RESERVED_LKEY    = 1 << 10,
        MLX4_BMME_FLAG_FAST_REG_WR      = 1 << 11,
+       MLX4_BMME_FLAG_ROCE_V1_V2       = 1 << 19,
        MLX4_BMME_FLAG_PORT_REMAP       = 1 << 24,
        MLX4_BMME_FLAG_VSD_INIT2RTR     = 1 << 28,
 };
 
 enum {
-       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP
+       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP,
+       MLX4_FLAG_ROCE_V1_V2            = MLX4_BMME_FLAG_ROCE_V1_V2
 };
 
 enum mlx4_event {
@@ -979,14 +982,11 @@ struct mlx4_mad_ifc {
        for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)     \
                if ((type) == (dev)->caps.port_mask[(port)])
 
-#define mlx4_foreach_non_ib_transport_port(port, dev)                     \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
-               if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
-
 #define mlx4_foreach_ib_transport_port(port, dev)                         \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
+       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
                if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
-                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+                       ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
 
 #define MLX4_INVALID_SLAVE_ID  0xFF
 #define MLX4_SINK_COUNTER_INDEX(dev)   (dev->caps.max_counters - 1)
@@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
 int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
index fe052e2..587cdf9 100644 (file)
@@ -194,7 +194,7 @@ struct mlx4_qp_context {
        u8                      mtu_msgmax;
        u8                      rq_size_stride;
        u8                      sq_size_stride;
-       u8                      rlkey;
+       u8                      rlkey_roce_mode;
        __be32                  usr_page;
        __be32                  local_qpn;
        __be32                  remote_qpn;
@@ -204,7 +204,8 @@ struct mlx4_qp_context {
        u32                     reserved1;
        __be32                  next_send_psn;
        __be32                  cqn_send;
-       u32                     reserved2[2];
+       __be16                  roce_entropy;
+       __be16                  reserved2[3];
        __be32                  last_acked_psn;
        __be32                  ssn;
        __be32                  params2;
@@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 
 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
+static inline u16 folded_qp(u32 q)
+{
+       u16 res;
+
+       res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
+       return res;
+}
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
+
 #endif /* MLX4_QP_H */
index 7be845e..987764a 100644 (file)
@@ -223,6 +223,14 @@ enum {
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
 
+#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
+
+enum {
+       MLX5_EVENT_QUEUE_TYPE_QP = 0,
+       MLX5_EVENT_QUEUE_TYPE_RQ = 1,
+       MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+};
+
 enum mlx5_event {
        MLX5_EVENT_TYPE_COMP               = 0x0,
 
@@ -279,6 +287,26 @@ enum {
        MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 3LL << 46,
 };
 
+enum {
+       MLX5_ROCE_VERSION_1             = 0,
+       MLX5_ROCE_VERSION_2             = 2,
+};
+
+enum {
+       MLX5_ROCE_VERSION_1_CAP         = 1 << MLX5_ROCE_VERSION_1,
+       MLX5_ROCE_VERSION_2_CAP         = 1 << MLX5_ROCE_VERSION_2,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4          = 0,
+       MLX5_ROCE_L3_TYPE_IPV6          = 1,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4_CAP      = 1 << 1,
+       MLX5_ROCE_L3_TYPE_IPV6_CAP      = 1 << 2,
+};
+
 enum {
        MLX5_OPCODE_NOP                 = 0x00,
        MLX5_OPCODE_SEND_INVAL          = 0x01,
@@ -446,7 +474,7 @@ struct mlx5_init_seg {
        __be32                  rsvd2[880];
        __be32                  internal_timer_h;
        __be32                  internal_timer_l;
-       __be32                  rsrv3[2];
+       __be32                  rsvd3[2];
        __be32                  health_counter;
        __be32                  rsvd4[1019];
        __be64                  ieee1588_clk;
@@ -460,7 +488,9 @@ struct mlx5_eqe_comp {
 };
 
 struct mlx5_eqe_qp_srq {
-       __be32  reserved[6];
+       __be32  reserved1[5];
+       u8      type;
+       u8      reserved2[3];
        __be32  qp_srq_n;
 };
 
@@ -650,6 +680,12 @@ enum {
        CQE_RSS_HTYPE_L4        = 0x3 << 2,
 };
 
+enum {
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH        = 0x0,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6       = 0x1,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4       = 0x2,
+};
+
 enum {
        CQE_L2_OK       = 1 << 0,
        CQE_L3_OK       = 1 << 1,
index 5162f35..1e3006d 100644 (file)
@@ -115,6 +115,11 @@ enum {
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum {
+       MLX5_ATOMIC_OPS_CMP_SWAP        = 1 << 0,
+       MLX5_ATOMIC_OPS_FETCH_ADD       = 1 << 1,
+};
+
 enum mlx5_page_fault_resume_flags {
        MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
        MLX5_PAGE_FAULT_RESUME_WRITE     = 1 << 1,
@@ -341,9 +346,11 @@ struct mlx5_core_mr {
 };
 
 enum mlx5_res_type {
-       MLX5_RES_QP,
-       MLX5_RES_SRQ,
-       MLX5_RES_XSRQ,
+       MLX5_RES_QP     = MLX5_EVENT_QUEUE_TYPE_QP,
+       MLX5_RES_RQ     = MLX5_EVENT_QUEUE_TYPE_RQ,
+       MLX5_RES_SQ     = MLX5_EVENT_QUEUE_TYPE_SQ,
+       MLX5_RES_SRQ    = 3,
+       MLX5_RES_XSRQ   = 4,
 };
 
 struct mlx5_core_rsc_common {
@@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq;
        .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
        .struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
 
-struct ib_field {
-       size_t struct_offset_bytes;
-       size_t struct_size_bytes;
-       int    offset_bits;
-       int    size_bits;
-};
-
 static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
 {
        return pci_get_drvdata(pdev);
index 68d73f8..231ab6b 100644 (file)
@@ -66,6 +66,11 @@ enum {
        MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN   = 0x3
 };
 
+enum {
+       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+       MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
+};
+
 enum {
        MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
        MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -573,21 +578,24 @@ enum {
 struct mlx5_ifc_atomic_caps_bits {
        u8         reserved_0[0x40];
 
-       u8         atomic_req_endianness[0x1];
-       u8         reserved_1[0x1f];
+       u8         atomic_req_8B_endianess_mode[0x2];
+       u8         reserved_1[0x4];
+       u8         supported_atomic_req_8B_endianess_mode_1[0x1];
 
-       u8         reserved_2[0x20];
+       u8         reserved_2[0x19];
 
-       u8         reserved_3[0x10];
-       u8         atomic_operations[0x10];
+       u8         reserved_3[0x20];
 
        u8         reserved_4[0x10];
-       u8         atomic_size_qp[0x10];
+       u8         atomic_operations[0x10];
 
        u8         reserved_5[0x10];
+       u8         atomic_size_qp[0x10];
+
+       u8         reserved_6[0x10];
        u8         atomic_size_dc[0x10];
 
-       u8         reserved_6[0x720];
+       u8         reserved_7[0x720];
 };
 
 struct mlx5_ifc_odp_cap_bits {
@@ -850,7 +858,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_66[0x8];
        u8         log_uar_page_sz[0x10];
 
-       u8         reserved_67[0x40];
+       u8         reserved_67[0x20];
+       u8         device_frequency_mhz[0x20];
        u8         device_frequency_khz[0x20];
        u8         reserved_68[0x5f];
        u8         cqe_zip[0x1];
@@ -2215,19 +2224,25 @@ struct mlx5_ifc_nic_vport_context_bits {
 
        u8         mtu[0x10];
 
-       u8         reserved_3[0x640];
+       u8         system_image_guid[0x40];
+       u8         port_guid[0x40];
+       u8         node_guid[0x40];
+
+       u8         reserved_3[0x140];
+       u8         qkey_violation_counter[0x10];
+       u8         reserved_4[0x430];
 
        u8         promisc_uc[0x1];
        u8         promisc_mc[0x1];
        u8         promisc_all[0x1];
-       u8         reserved_4[0x2];
+       u8         reserved_5[0x2];
        u8         allowed_list_type[0x3];
-       u8         reserved_5[0xc];
+       u8         reserved_6[0xc];
        u8         allowed_list_size[0xc];
 
        struct mlx5_ifc_mac_address_layout_bits permanent_address;
 
-       u8         reserved_6[0x20];
+       u8         reserved_7[0x20];
 
        u8         current_uc_mac_address[0][0x40];
 };
@@ -4199,6 +4214,13 @@ struct mlx5_ifc_modify_tis_out_bits {
        u8         reserved_1[0x40];
 };
 
+struct mlx5_ifc_modify_tis_bitmask_bits {
+       u8         reserved_0[0x20];
+
+       u8         reserved_1[0x1f];
+       u8         prio[0x1];
+};
+
 struct mlx5_ifc_modify_tis_in_bits {
        u8         opcode[0x10];
        u8         reserved_0[0x10];
@@ -4211,7 +4233,7 @@ struct mlx5_ifc_modify_tis_in_bits {
 
        u8         reserved_3[0x20];
 
-       u8         modify_bitmask[0x40];
+       struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
 
        u8         reserved_4[0x40];
 
index f079fb1..5b8c89f 100644 (file)
@@ -85,7 +85,16 @@ enum mlx5_qp_state {
        MLX5_QP_STATE_ERR                       = 6,
        MLX5_QP_STATE_SQ_DRAINING               = 7,
        MLX5_QP_STATE_SUSPENDED                 = 9,
-       MLX5_QP_NUM_STATE
+       MLX5_QP_NUM_STATE,
+       MLX5_QP_STATE,
+       MLX5_QP_STATE_BAD,
+};
+
+enum {
+       MLX5_SQ_STATE_NA        = MLX5_SQC_STATE_ERR + 1,
+       MLX5_SQ_NUM_STATE       = MLX5_SQ_STATE_NA + 1,
+       MLX5_RQ_STATE_NA        = MLX5_RQC_STATE_ERR + 1,
+       MLX5_RQ_NUM_STATE       = MLX5_RQ_STATE_NA + 1,
 };
 
 enum {
@@ -130,6 +139,9 @@ enum {
        MLX5_QP_BIT_RWE                         = 1 << 14,
        MLX5_QP_BIT_RAE                         = 1 << 13,
        MLX5_QP_BIT_RIC                         = 1 <<  4,
+       MLX5_QP_BIT_CC_SLAVE_RECV               = 1 <<  2,
+       MLX5_QP_BIT_CC_SLAVE_SEND               = 1 <<  1,
+       MLX5_QP_BIT_CC_MASTER                   = 1 <<  0
 };
 
 enum {
@@ -248,8 +260,12 @@ struct mlx5_av {
        __be32  dqp_dct;
        u8      stat_rate_sl;
        u8      fl_mlid;
-       __be16  rlid;
-       u8      reserved0[10];
+       union {
+               __be16  rlid;
+               __be16  udp_sport;
+       };
+       u8      reserved0[4];
+       u8      rmac[6];
        u8      tclass;
        u8      hop_limit;
        __be32  grh_gid_fl;
@@ -456,11 +472,16 @@ struct mlx5_qp_path {
        u8                      static_rate;
        u8                      hop_limit;
        __be32                  tclass_flowlabel;
-       u8                      rgid[16];
-       u8                      rsvd1[4];
-       u8                      sl;
+       union {
+               u8              rgid[16];
+               u8              rip[16];
+       };
+       u8                      f_dscp_ecn_prio;
+       u8                      ecn_dscp;
+       __be16                  udp_sport;
+       u8                      dci_cfi_prio_sl;
        u8                      port;
-       u8                      rsvd2[6];
+       u8                      rmac[6];
 };
 
 struct mlx5_qp_context {
@@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
@@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
                                u8 context, int error);
 #endif
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq);
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq);
 
 static inline const char *mlx5_qp_type_str(int type)
 {
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
new file mode 100644 (file)
index 0000000..88441f5
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __TRANSOBJ_H__
+#define __TRANSOBJ_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *rqn);
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *sqn);
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tirn);
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rmpn);
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                         u32 *rmpn);
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rqtn);
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
+
+#endif /* __TRANSOBJ_H__ */
index 638f2ca..1237710 100644 (file)
@@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
                                      u16 vport, u8 *addr);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid);
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr);
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid);
@@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
                                u16 vlans[],
                                int list_size);
 
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+
 #endif /* __MLX5_VPORT_H__ */
index f1cd22f..516e149 100644 (file)
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSUP
 #else
-#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
@@ -1341,8 +1343,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
                !vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
index d3ebb9d..624b78b 100644 (file)
@@ -424,9 +424,9 @@ struct mm_struct {
        unsigned long total_vm;         /* Total pages mapped */
        unsigned long locked_vm;        /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;        /* Refcount permanently increased */
-       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
-       unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
+       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+       unsigned long stack_vm;         /* VM_STACK */
        unsigned long def_flags;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
index 33bb1b1..7b6c2cf 100644 (file)
@@ -682,6 +682,12 @@ typedef struct pglist_data {
         */
        unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
index 4560d8f..2bb0c30 100644 (file)
@@ -324,6 +324,12 @@ struct module_layout {
 #define __module_layout_align
 #endif
 
+struct mod_kallsyms {
+       Elf_Sym *symtab;
+       unsigned int num_symtab;
+       char *strtab;
+};
+
 struct module {
        enum module_state state;
 
@@ -405,15 +411,10 @@ struct module {
 #endif
 
 #ifdef CONFIG_KALLSYMS
-       /*
-        * We keep the symbol and string tables for kallsyms.
-        * The core_* fields below are temporary, loader-only (they
-        * could really be discarded after module init).
-        */
-       Elf_Sym *symtab, *core_symtab;
-       unsigned int num_symtab, core_num_syms;
-       char *strtab, *core_strtab;
-
+       /* Protected by RCU and/or module_mutex: use rcu_dereference() */
+       struct mod_kallsyms *kallsyms;
+       struct mod_kallsyms core_kallsyms;
+       
        /* Section attributes */
        struct module_sect_attrs *sect_attrs;
 
index 5ac140d..289c231 100644 (file)
@@ -512,7 +512,6 @@ static inline void napi_enable(struct napi_struct *n)
        clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *     napi_synchronize - wait until NAPI is not running
  *     @n: napi context
@@ -523,12 +522,12 @@ static inline void napi_enable(struct napi_struct *n)
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-       while (test_bit(NAPI_STATE_SCHED, &n->state))
-               msleep(1);
+       if (IS_ENABLED(CONFIG_SMP))
+               while (test_bit(NAPI_STATE_SCHED, &n->state))
+                       msleep(1);
+       else
+               barrier();
 }
-#else
-# define napi_synchronize(n)   barrier()
-#endif
 
 enum netdev_queue_state_t {
        __QUEUE_STATE_DRV_XOFF,
index dd10626..dc6e396 100644 (file)
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
        return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)                  \
        static const struct of_device_id __of_table_##name              \
                __used __section(__##table##_of_table)                  \
index 4d08b6c..92395a0 100644 (file)
@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
                               unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                        int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
                        pgoff_t index, unsigned flags);
index f9828a4..b35a61a 100644 (file)
@@ -634,9 +634,6 @@ struct perf_event_context {
        int                             nr_cgroups;      /* cgroup evts */
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
-
-       struct delayed_work             orphans_remove;
-       bool                            orphans_remove_sched;
 };
 
 /*
@@ -729,7 +726,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1044,7 +1041,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1070,7 +1067,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)     { }
 static inline void perf_event_free_task(struct task_struct *task)      { }
 static inline void perf_event_delayed_put(struct task_struct *task)    { }
-static inline struct perf_event *perf_event_get(unsigned int fd)       { return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)     { return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
        return ERR_PTR(-EINVAL);
index 0703b53..37448ab 100644 (file)
@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
        return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
        return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
        return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
index eb8b8ac..24f5470 100644 (file)
@@ -42,6 +42,7 @@ struct pipe_buffer {
  *     @fasync_readers: reader side fasync
  *     @fasync_writers: writer side fasync
  *     @bufs: the circular array of pipe buffers
+ *     @user: the user who created this pipe
  **/
 struct pipe_inode_info {
        struct mutex mutex;
@@ -57,6 +58,7 @@ struct pipe_inode_info {
        struct fasync_struct *fasync_readers;
        struct fasync_struct *fasync_writers;
        struct pipe_buffer *bufs;
+       struct user_struct *user;
 };
 
 /*
@@ -123,6 +125,8 @@ void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 
 extern unsigned int pipe_max_size, pipe_min_size;
+extern unsigned long pipe_user_pages_hard;
+extern unsigned long pipe_user_pages_soft;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
 
diff --git a/include/linux/platform_data/sdhci-pic32.h b/include/linux/platform_data/sdhci-pic32.h
new file mode 100644 (file)
index 0000000..7e0efe6
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef __PIC32_SDHCI_PDATA_H__
+#define __PIC32_SDHCI_PDATA_H__
+
+struct pic32_sdhci_platform_data {
+       /* read & write fifo threshold */
+       int (*setup_dma)(u32 rfifo, u32 wfifo);
+};
+
+#endif
index acfea8c..7c3d11a 100644 (file)
@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 {
        BUG();
 }
+
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       BUG();
+}
 #endif
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
  * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
+ * and arch_has_wmb_pmem().
  */
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
        else
                default_clear_pmem(addr, size);
 }
+
+/**
+ * wb_cache_pmem - write back processor cache for PMEM memory range
+ * @addr:      virtual start address
+ * @size:      number of bytes to write back
+ *
+ * Write back the processor cache range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       if (arch_has_pmem_api())
+               arch_wb_cache_pmem(addr, size);
+}
 #endif /* __PMEM_H__ */
index 57e7d87..f54be70 100644 (file)
 #define RADIX_TREE_EXCEPTIONAL_ENTRY   2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT   2
 
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT        4
+#define RADIX_DAX_PTE  (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD  (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+               RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
        return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
@@ -369,13 +378,29 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:      iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+       iter->next_index = iter->index;
+       return NULL;
+}
+
 /**
  * radix_tree_chunk_size - get current chunk size
  *
  * @iter:      pointer to radix tree iterator
  * Returns:    current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
        return iter->next_index - iter->index;
@@ -409,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
                        return slot + offset + 1;
                }
        } else {
-               unsigned size = radix_tree_chunk_size(iter) - 1;
+               long size = radix_tree_chunk_size(iter);
 
-               while (size--) {
+               while (--size > 0) {
                        slot++;
                        iter->index++;
                        if (likely(*slot))
index a7a06d1..a0118d5 100644 (file)
@@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 
 # define jiffies       raid6_jiffies()
 # define printk        printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL    0
 # define __get_free_pages(x, y)        ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
                                                     PROT_READ|PROT_WRITE,   \
index bdf597c..a07f42b 100644 (file)
@@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
                __put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
        down_write(&anon_vma->root->rwsem);
index f1e81e1..a10494a 100644 (file)
@@ -835,6 +835,7 @@ struct user_struct {
 #endif
        unsigned long locked_shm; /* How many pages of mlocked shm ? */
        unsigned long unix_inflight;    /* How many files in flight in unix sockets */
+       atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
 #ifdef CONFIG_KEYS
        struct key *uid_keyring;        /* UID specific keyring */
index a43f41c..4d4780c 100644 (file)
@@ -15,10 +15,7 @@ struct shmem_inode_info {
        unsigned int            seals;          /* shmem seals */
        unsigned long           flags;
        unsigned long           alloced;        /* data pages alloced to file */
-       union {
-               unsigned long   swapped;        /* subtotal assigned to swap */
-               char            *symlink;       /* unswappable short symlink */
-       };
+       unsigned long           swapped;        /* subtotal assigned to swap */
        struct shared_policy    policy;         /* NUMA memory alloc policy */
        struct list_head        swaplist;       /* chain of maybes on swap */
        struct simple_xattrs    xattrs;         /* list of xattrs */
index f869807..5322fea 100644 (file)
@@ -51,6 +51,7 @@
 /* RPC/RDMA parameters and stats */
 extern unsigned int svcrdma_ord;
 extern unsigned int svcrdma_max_requests;
+extern unsigned int svcrdma_max_bc_requests;
 extern unsigned int svcrdma_max_req_size;
 
 extern atomic_t rdma_stat_recv;
@@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
+       struct list_head free;
        struct svc_rdma_op_ctxt *read_hdr;
        struct svc_rdma_fastreg_mr *frmr;
        int hdr_count;
@@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr {
        struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
+       struct list_head free;
        unsigned long count;
        union {
                struct kvec sge[RPCSVC_MAXPAGES];
@@ -132,28 +135,32 @@ struct svcxprt_rdma {
        int                  sc_max_sge;
        int                  sc_max_sge_rd;     /* max sge for read target */
 
-       int                  sc_sq_depth;       /* Depth of SQ */
        atomic_t             sc_sq_count;       /* Number of SQ WR on queue */
-
-       int                  sc_max_requests;   /* Depth of RQ */
+       unsigned int         sc_sq_depth;       /* Depth of SQ */
+       unsigned int         sc_rq_depth;       /* Depth of RQ */
+       u32                  sc_max_requests;   /* Forward credits */
+       u32                  sc_max_bc_requests;/* Backward credits */
        int                  sc_max_req_size;   /* Size of each RQ WR buf */
 
        struct ib_pd         *sc_pd;
 
        atomic_t             sc_dma_used;
-       atomic_t             sc_ctxt_used;
+       spinlock_t           sc_ctxt_lock;
+       struct list_head     sc_ctxts;
+       int                  sc_ctxt_used;
+       spinlock_t           sc_map_lock;
+       struct list_head     sc_maps;
+
        struct list_head     sc_rq_dto_q;
        spinlock_t           sc_rq_dto_lock;
        struct ib_qp         *sc_qp;
        struct ib_cq         *sc_rq_cq;
        struct ib_cq         *sc_sq_cq;
-       struct ib_mr         *sc_phys_mr;       /* MR for server memory */
        int                  (*sc_reader)(struct svcxprt_rdma *,
                                          struct svc_rqst *,
                                          struct svc_rdma_op_ctxt *,
                                          int *, u32 *, u32, u32, u64, bool);
        u32                  sc_dev_caps;       /* distilled device caps */
-       u32                  sc_dma_lkey;       /* local dma key */
        unsigned int         sc_frmr_pg_list_len;
        struct list_head     sc_frmr_q;
        spinlock_t           sc_frmr_q_lock;
@@ -179,8 +186,18 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
+ * current NFSv4.1 implementation supports one backchannel slot.
+ */
+#define RPCRDMA_MAX_BC_REQUESTS        2
+
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
+/* svc_rdma_backchannel.c */
+extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+                                   struct rpcrdma_msg *rmsgp,
+                                   struct xdr_buf *rcvbuf);
+
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
                                u32, u32, u64, bool);
 
 /* svc_rdma_sendto.c */
+extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
+                           struct svc_rdma_req_map *);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
        svc_rdma_get_read_chunk(struct rpcrdma_msg *);
@@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk *
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
                                enum rpcrdma_errcode);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *);
+extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
-extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
+extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
+                                struct svc_rdma_req_map *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
                              struct svc_rdma_fastreg_mr *);
@@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index e7a018e..017fced 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
index 613c29b..e13a1ac 100644 (file)
@@ -43,6 +43,9 @@
 /* Default weight of a bound cooling device */
 #define THERMAL_WEIGHT_DEFAULT 0
 
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID   -274000
+
 /* Unit conversion macros */
 #define DECI_KELVIN_TO_CELSIUS(t)      ({                      \
        long _t = (t);                                          \
@@ -167,6 +170,7 @@ struct thermal_attr {
  * @forced_passive:    If > 0, temperature at which to switch on all ACPI
  *                     processor cooling devices.  Currently only used by the
  *                     step-wise governor.
+ * @need_update:       if equals 1, thermal_zone_device_update needs to be invoked.
  * @ops:       operations this &thermal_zone_device supports
  * @tzp:       thermal zone parameters
  * @governor:  pointer to the governor for this thermal zone
@@ -194,6 +198,7 @@ struct thermal_zone_device {
        int emul_temperature;
        int passive;
        unsigned int forced_passive;
+       atomic_t need_update;
        struct thermal_zone_device_ops *ops;
        struct thermal_zone_params *tzp;
        struct thermal_governor *governor;
index 2fd8708..d9fb4b0 100644 (file)
@@ -649,6 +649,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
index 0e32bc7..ca73c50 100644 (file)
@@ -311,6 +311,7 @@ enum {
 
        __WQ_DRAINING           = 1 << 16, /* internal: workqueue is draining */
        __WQ_ORDERED            = 1 << 17, /* internal: workqueue is ordered */
+       __WQ_LEGACY             = 1 << 18, /* internal: create*_workqueue() */
 
        WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
        WQ_MAX_UNBOUND_PER_CPU  = 4,      /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
        alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)                                         \
-       alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)                               \
-       alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-                       1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+                       WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)                            \
-       alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+       alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
index ef03ae5..8a0f55b 100644 (file)
@@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
                const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
index 5289929..5ee3c68 100644 (file)
@@ -252,6 +252,12 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_3DSP         0x0021
 #define L2CAP_PSM_IPSP         0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START    0x1001
+#define L2CAP_PSM_DYN_END      0xffff
+#define L2CAP_PSM_AUTO_END     0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END   0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING    0x0001
 #define L2CAP_CID_CONN_LESS    0x0002
index 6816f0f..30a56ab 100644 (file)
@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
        return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+                                      const struct sk_buff *skb_b)
+{
+       const struct metadata_dst *a, *b;
+
+       if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+               return 0;
+
+       a = (const struct metadata_dst *) skb_dst(skb_a);
+       b = (const struct metadata_dst *) skb_dst(skb_b);
+
+       if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+               return 1;
+
+       return memcmp(&a->u.tun_info, &b->u.tun_info,
+                     sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
index 877f682..295d291 100644 (file)
@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                  struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+                                                const struct sock *sk,
+                                                struct flowi6 *fl6)
+{
+       return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
                                   int flags);
 
index 788ef58..62e17d1 100644 (file)
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
index 20e7212..205630b 100644 (file)
@@ -756,7 +756,6 @@ struct sctp_transport {
 
        /* Reference counting. */
        atomic_t refcnt;
-       __u32    dead:1,
                /* RTO-Pending : A flag used to track if one of the DATA
                 *              chunks sent to this address is currently being
                 *              used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@ struct sctp_transport {
                 *              calculation completes (i.e. the DATA chunk
                 *              is SACK'd) clear this flag.
                 */
-                rto_pending:1,
+       __u32   rto_pending:1,
 
                /*
                 * hb_sent : a flag that signals that we have a pending
@@ -955,7 +954,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
index b9e7b3d..f5ea148 100644 (file)
@@ -1035,18 +1035,6 @@ struct proto {
        struct list_head        node;
 #ifdef SOCK_REFCNT_DEBUG
        atomic_t                socks;
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-       /*
-        * cgroup specific init/deinit functions. Called once for all
-        * protocols that implement it, from cgroups populate function.
-        * This function has to setup any files the protocol want to
-        * appear in the kmem cgroup filesystem.
-        */
-       int                     (*init_cgroup)(struct mem_cgroup *memcg,
-                                              struct cgroup_subsys *ss);
-       void                    (*destroy_cgroup)(struct mem_cgroup *memcg);
-       struct cg_proto         *(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
        int                     (*diag_destroy)(struct sock *sk, int err);
 };
index 7dda3d7..aecd303 100644 (file)
@@ -16,7 +16,7 @@ struct sock_reuseport {
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
                                          u32 hash,
index 8ea1997..f6f8f03 100644 (file)
@@ -216,7 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND          10
 
 /* Bit Flags for sysctl_tcp_fastopen */
index 1152859..c34c900 100644 (file)
@@ -83,6 +83,8 @@ struct rdma_dev_addr {
        int bound_dev_if;
        enum rdma_transport_type transport;
        struct net *net;
+       enum rdma_network_type network;
+       int hoplimit;
 };
 
 /**
@@ -91,8 +93,8 @@ struct rdma_dev_addr {
  *
  * The dev_addr->net field must be initialized.
  */
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
-                     u16 *vlan_id);
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr, u16 *vlan_id);
 
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
                                     struct rdma_dev_addr *addr, void *context),
                    void *context);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
@@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 int rdma_addr_size(struct sockaddr *addr);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *smac, u16 *vlan_id, int if_index);
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *smac, u16 *vlan_id, int *if_index,
+                                int *hoplimit);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
index 269a27c..e30f19b 100644 (file)
@@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @ndev: In RoCE, the net device of the device. NULL means ignore.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the cached GID table where the GID was found.  This
@@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  */
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index);
@@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device,
  * GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @port_num: The port number of the device where the GID value sould be
  *   searched.
  * @ndev: In RoCE, the net device of the device. Null means ignore.
@@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device,
  */
 int ib_find_cached_gid_by_port(struct ib_device *device,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8               port_num,
                               struct net_device *ndev,
                               u16              *index);
index ec9b44d..0ff049b 100644 (file)
@@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
 /**
  * ib_mad_recv_handler - callback handler for a received MAD.
  * @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
  * @mad_recv_wc: Received work completion information on the received MAD.
  *
  * MADs received in response to a send request operation will be handed to
@@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
  * modify the data referenced by @mad_recv_wc.
  */
 typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+                                   struct ib_mad_send_buf *send_buf,
                                    struct ib_mad_recv_wc *mad_recv_wc);
 
 /**
index e99d8f9..0f3daae 100644 (file)
@@ -41,6 +41,8 @@ enum {
        IB_ETH_BYTES  = 14,
        IB_VLAN_BYTES = 4,
        IB_GRH_BYTES  = 40,
+       IB_IP4_BYTES  = 20,
+       IB_UDP_BYTES  = 8,
        IB_BTH_BYTES  = 12,
        IB_DETH_BYTES = 8
 };
@@ -223,6 +225,27 @@ struct ib_unpacked_eth {
        __be16  type;
 };
 
+struct ib_unpacked_ip4 {
+       u8      ver;
+       u8      hdr_len;
+       u8      tos;
+       __be16  tot_len;
+       __be16  id;
+       __be16  frag_off;
+       u8      ttl;
+       u8      protocol;
+       __sum16 check;
+       __be32  saddr;
+       __be32  daddr;
+};
+
+struct ib_unpacked_udp {
+       __be16  sport;
+       __be16  dport;
+       __be16  length;
+       __be16  csum;
+};
+
 struct ib_unpacked_vlan {
        __be16  tag;
        __be16  type;
@@ -237,6 +260,10 @@ struct ib_ud_header {
        struct ib_unpacked_vlan vlan;
        int                     grh_present;
        struct ib_unpacked_grh  grh;
+       int                     ipv4_present;
+       struct ib_unpacked_ip4  ip4;
+       int                     udp_present;
+       struct ib_unpacked_udp  udp;
        struct ib_unpacked_bth  bth;
        struct ib_unpacked_deth deth;
        int                     immediate_present;
@@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field        *desc,
               void                         *buf,
               void                         *structure);
 
-void ib_ud_header_init(int                 payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header);
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int                  payload_bytes,
+                     int                   lrh_present,
+                     int                   eth_present,
+                     int                   vlan_present,
+                     int                   grh_present,
+                     int                   ip_version,
+                     int                   udp_present,
+                     int                   immediate_present,
+                     struct ib_ud_header *header);
 
 int ib_ud_header_pack(struct ib_ud_header *header,
                      void                *buf);
index a5889f1..2f8a65c 100644 (file)
@@ -42,6 +42,7 @@
  */
 #define IB_PMA_CLASS_CAP_ALLPORTSELECT  cpu_to_be16(1 << 8)
 #define IB_PMA_CLASS_CAP_EXT_WIDTH      cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
 #define IB_PMA_CLASS_CAP_XMIT_WAIT      cpu_to_be16(1 << 12)
 
 #define IB_PMA_CLASS_PORT_INFO          cpu_to_be16(0x0001)
index 3019695..cdc1c81 100644 (file)
@@ -160,6 +160,7 @@ struct ib_sa_path_rec {
        int          ifindex;
        /* ignored in IB */
        struct net  *net;
+       enum ib_gid_type gid_type;
 };
 
 static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
@@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
  */
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr);
 
 /**
index 120da1d..284b00c 100644 (file)
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 #include <linux/socket.h>
+#include <linux/irq_poll.h>
 #include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
 
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
 
 union ib_gid {
        u8      raw[16];
@@ -67,7 +73,17 @@ union ib_gid {
 
 extern union ib_gid zgid;
 
+enum ib_gid_type {
+       /* If link layer is Ethernet, this is RoCE V1 */
+       IB_GID_TYPE_IB        = 0,
+       IB_GID_TYPE_ROCE      = 0,
+       IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+       IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT      4791
 struct ib_gid_attr {
+       enum ib_gid_type        gid_type;
        struct net_device       *ndev;
 };
 
@@ -98,6 +114,35 @@ enum rdma_protocol_type {
 __attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type);
 
+enum rdma_network_type {
+       RDMA_NETWORK_IB,
+       RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+       RDMA_NETWORK_IPV4,
+       RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+       if (network_type == RDMA_NETWORK_IPV4 ||
+           network_type == RDMA_NETWORK_IPV6)
+               return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+       /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
+       return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
+                                                           union ib_gid *gid)
+{
+       if (gid_type == IB_GID_TYPE_IB)
+               return RDMA_NETWORK_IB;
+
+       if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+               return RDMA_NETWORK_IPV4;
+       else
+               return RDMA_NETWORK_IPV6;
+}
+
 enum rdma_link_layer {
        IB_LINK_LAYER_UNSPECIFIED,
        IB_LINK_LAYER_INFINIBAND,
@@ -105,24 +150,32 @@ enum rdma_link_layer {
 };
 
 enum ib_device_cap_flags {
-       IB_DEVICE_RESIZE_MAX_WR         = 1,
-       IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
-       IB_DEVICE_BAD_QKEY_CNTR         = (1<<2),
-       IB_DEVICE_RAW_MULTI             = (1<<3),
-       IB_DEVICE_AUTO_PATH_MIG         = (1<<4),
-       IB_DEVICE_CHANGE_PHY_PORT       = (1<<5),
-       IB_DEVICE_UD_AV_PORT_ENFORCE    = (1<<6),
-       IB_DEVICE_CURR_QP_STATE_MOD     = (1<<7),
-       IB_DEVICE_SHUTDOWN_PORT         = (1<<8),
-       IB_DEVICE_INIT_TYPE             = (1<<9),
-       IB_DEVICE_PORT_ACTIVE_EVENT     = (1<<10),
-       IB_DEVICE_SYS_IMAGE_GUID        = (1<<11),
-       IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
-       IB_DEVICE_SRQ_RESIZE            = (1<<13),
-       IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
-       IB_DEVICE_LOCAL_DMA_LKEY        = (1<<15),
-       IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
-       IB_DEVICE_MEM_WINDOW            = (1<<17),
+       IB_DEVICE_RESIZE_MAX_WR                 = (1 << 0),
+       IB_DEVICE_BAD_PKEY_CNTR                 = (1 << 1),
+       IB_DEVICE_BAD_QKEY_CNTR                 = (1 << 2),
+       IB_DEVICE_RAW_MULTI                     = (1 << 3),
+       IB_DEVICE_AUTO_PATH_MIG                 = (1 << 4),
+       IB_DEVICE_CHANGE_PHY_PORT               = (1 << 5),
+       IB_DEVICE_UD_AV_PORT_ENFORCE            = (1 << 6),
+       IB_DEVICE_CURR_QP_STATE_MOD             = (1 << 7),
+       IB_DEVICE_SHUTDOWN_PORT                 = (1 << 8),
+       IB_DEVICE_INIT_TYPE                     = (1 << 9),
+       IB_DEVICE_PORT_ACTIVE_EVENT             = (1 << 10),
+       IB_DEVICE_SYS_IMAGE_GUID                = (1 << 11),
+       IB_DEVICE_RC_RNR_NAK_GEN                = (1 << 12),
+       IB_DEVICE_SRQ_RESIZE                    = (1 << 13),
+       IB_DEVICE_N_NOTIFY_CQ                   = (1 << 14),
+
+       /*
+        * This device supports a per-device lkey or stag that can be
+        * used without performing a memory registration for the local
+        * memory.  Note that ULPs should never check this flag, but
+        * instead of use the local_dma_lkey flag in the ib_pd structure,
+        * which will always contain a usable lkey.
+        */
+       IB_DEVICE_LOCAL_DMA_LKEY                = (1 << 15),
+       IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
+       IB_DEVICE_MEM_WINDOW                    = (1 << 17),
        /*
         * Devices should set IB_DEVICE_UD_IP_SUM if they support
         * insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -130,18 +183,35 @@ enum ib_device_cap_flags {
         * incoming messages.  Setting this flag implies that the
         * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
         */
-       IB_DEVICE_UD_IP_CSUM            = (1<<18),
-       IB_DEVICE_UD_TSO                = (1<<19),
-       IB_DEVICE_XRC                   = (1<<20),
-       IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
-       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
-       IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
-       IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
-       IB_DEVICE_RC_IP_CSUM            = (1<<25),
-       IB_DEVICE_RAW_IP_CSUM           = (1<<26),
-       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30),
-       IB_DEVICE_ON_DEMAND_PAGING      = (1<<31),
+       IB_DEVICE_UD_IP_CSUM                    = (1 << 18),
+       IB_DEVICE_UD_TSO                        = (1 << 19),
+       IB_DEVICE_XRC                           = (1 << 20),
+
+       /*
+        * This device supports the IB "base memory management extension",
+        * which includes support for fast registrations (IB_WR_REG_MR,
+        * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
+        * also be set by any iWarp device which must support FRs to comply
+        * to the iWarp verbs spec.  iWarp devices also support the
+        * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+        * stag.
+        */
+       IB_DEVICE_MEM_MGT_EXTENSIONS            = (1 << 21),
+       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK      = (1 << 22),
+       IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
+       IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
+       IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
+       IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
+       /*
+        * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+        * support execution of WQEs that involve synchronization
+        * of I/O operations with single completion queue managed
+        * by hardware.
+        */
+       IB_DEVICE_CROSS_CHANNEL         = (1 << 27),
+       IB_DEVICE_MANAGED_FLOW_STEERING         = (1 << 29),
+       IB_DEVICE_SIGNATURE_HANDOVER            = (1 << 30),
+       IB_DEVICE_ON_DEMAND_PAGING              = (1 << 31),
 };
 
 enum ib_signature_prot_cap {
@@ -184,6 +254,7 @@ struct ib_odp_caps {
 
 enum ib_cq_creation_flags {
        IB_CQ_FLAGS_TIMESTAMP_COMPLETION   = 1 << 0,
+       IB_CQ_FLAGS_IGNORE_OVERRUN         = 1 << 1,
 };
 
 struct ib_cq_init_attr {
@@ -393,6 +464,7 @@ union rdma_protocol_stats {
 #define RDMA_CORE_CAP_PROT_IB           0x00100000
 #define RDMA_CORE_CAP_PROT_ROCE         0x00200000
 #define RDMA_CORE_CAP_PROT_IWARP        0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
 
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
@@ -405,6 +477,12 @@ union rdma_protocol_stats {
                                        | RDMA_CORE_CAP_IB_CM   \
                                        | RDMA_CORE_CAP_AF_IB   \
                                        | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP                      \
+                                       (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+                                       | RDMA_CORE_CAP_IB_MAD  \
+                                       | RDMA_CORE_CAP_IB_CM   \
+                                       | RDMA_CORE_CAP_AF_IB   \
+                                       | RDMA_CORE_CAP_ETH_AH)
 #define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
                                        | RDMA_CORE_CAP_IW_CM)
 #define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
@@ -519,6 +597,17 @@ struct ib_grh {
        union ib_gid    dgid;
 };
 
+union rdma_network_hdr {
+       struct ib_grh ibgrh;
+       struct {
+               /* The IB spec states that if it's IPv4, the header
+                * is located in the last 20 bytes of the header.
+                */
+               u8              reserved[20];
+               struct iphdr    roce4grh;
+       };
+};
+
 enum {
        IB_MULTICAST_QPN = 0xffffff
 };
@@ -734,7 +823,6 @@ enum ib_wc_opcode {
        IB_WC_RDMA_READ,
        IB_WC_COMP_SWAP,
        IB_WC_FETCH_ADD,
-       IB_WC_BIND_MW,
        IB_WC_LSO,
        IB_WC_LOCAL_INV,
        IB_WC_REG_MR,
@@ -755,10 +843,14 @@ enum ib_wc_flags {
        IB_WC_IP_CSUM_OK        = (1<<3),
        IB_WC_WITH_SMAC         = (1<<4),
        IB_WC_WITH_VLAN         = (1<<5),
+       IB_WC_WITH_NETWORK_HDR_TYPE     = (1<<6),
 };
 
 struct ib_wc {
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        enum ib_wc_status       status;
        enum ib_wc_opcode       opcode;
        u32                     vendor_err;
@@ -777,6 +869,7 @@ struct ib_wc {
        u8                      port_num;       /* valid only for DR SMPs on switches */
        u8                      smac[ETH_ALEN];
        u16                     vlan_id;
+       u8                      network_hdr_type;
 };
 
 enum ib_cq_notify_flags {
@@ -866,6 +959,9 @@ enum ib_qp_type {
 enum ib_qp_create_flags {
        IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
+       IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
+       IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
+       IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
@@ -1027,7 +1123,6 @@ enum ib_wr_opcode {
        IB_WR_REG_MR,
        IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
-       IB_WR_BIND_MW,
        IB_WR_REG_SIG_MR,
        /* reserve values for low level drivers' internal use.
         * These values will not be used at all in the ib core layer.
@@ -1062,26 +1157,16 @@ struct ib_sge {
        u32     lkey;
 };
 
-/**
- * struct ib_mw_bind_info - Parameters for a memory window bind operation.
- * @mr: A memory region to bind the memory window to.
- * @addr: The address where the memory window should begin.
- * @length: The length of the memory window, in bytes.
- * @mw_access_flags: Access flags from enum ib_access_flags for the window.
- *
- * This struct contains the shared parameters for type 1 and type 2
- * memory window bind operations.
- */
-struct ib_mw_bind_info {
-       struct ib_mr   *mr;
-       u64             addr;
-       u64             length;
-       int             mw_access_flags;
+struct ib_cqe {
+       void (*done)(struct ib_cq *cq, struct ib_wc *wc);
 };
 
 struct ib_send_wr {
        struct ib_send_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
        enum ib_wr_opcode       opcode;
@@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
        return container_of(wr, struct ib_reg_wr, wr);
 }
 
-struct ib_bind_mw_wr {
-       struct ib_send_wr       wr;
-       struct ib_mw            *mw;
-       /* The new rkey for the memory window. */
-       u32                     rkey;
-       struct ib_mw_bind_info  bind_info;
-};
-
-static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
-{
-       return container_of(wr, struct ib_bind_mw_wr, wr);
-}
-
 struct ib_sig_handover_wr {
        struct ib_send_wr       wr;
        struct ib_sig_attrs    *sig_attrs;
@@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
 
 struct ib_recv_wr {
        struct ib_recv_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
 };
@@ -1190,20 +1265,10 @@ enum ib_access_flags {
        IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
-struct ib_phys_buf {
-       u64      addr;
-       u64      size;
-};
-
-struct ib_mr_attr {
-       struct ib_pd    *pd;
-       u64             device_virt_addr;
-       u64             size;
-       int             mr_access_flags;
-       u32             lkey;
-       u32             rkey;
-};
-
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
 enum ib_mr_rereg_flags {
        IB_MR_REREG_TRANS       = 1,
        IB_MR_REREG_PD          = (1<<1),
@@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags {
        IB_MR_REREG_SUPPORTED   = ((IB_MR_REREG_ACCESS << 1) - 1)
 };
 
-/**
- * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
- * @wr_id:      Work request id.
- * @send_flags: Flags from ib_send_flags enum.
- * @bind_info:  More parameters of the bind operation.
- */
-struct ib_mw_bind {
-       u64                    wr_id;
-       int                    send_flags;
-       struct ib_mw_bind_info bind_info;
-};
-
 struct ib_fmr_attr {
        int     max_pages;
        int     max_maps;
@@ -1307,6 +1360,12 @@ struct ib_ah {
 
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
+enum ib_poll_context {
+       IB_POLL_DIRECT,         /* caller context, no hw completions */
+       IB_POLL_SOFTIRQ,        /* poll from softirq context */
+       IB_POLL_WORKQUEUE,      /* poll from workqueue */
+};
+
 struct ib_cq {
        struct ib_device       *device;
        struct ib_uobject      *uobject;
@@ -1315,6 +1374,12 @@ struct ib_cq {
        void                   *cq_context;
        int                     cqe;
        atomic_t                usecnt; /* count number of work queues */
+       enum ib_poll_context    poll_ctx;
+       struct ib_wc            *wc;
+       union {
+               struct irq_poll         iop;
+               struct work_struct      work;
+       };
 };
 
 struct ib_srq {
@@ -1363,7 +1428,6 @@ struct ib_mr {
        u64                iova;
        u32                length;
        unsigned int       page_size;
-       atomic_t           usecnt; /* count number of MWs */
 };
 
 struct ib_mw {
@@ -1724,11 +1788,6 @@ struct ib_device {
                                                      int wc_cnt);
        struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
                                                 int mr_access_flags);
-       struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd,
-                                                 struct ib_phys_buf *phys_buf_array,
-                                                 int num_phys_buf,
-                                                 int mr_access_flags,
-                                                 u64 *iova_start);
        struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
                                                  u64 start, u64 length,
                                                  u64 virt_addr,
@@ -1741,8 +1800,6 @@ struct ib_device {
                                                    int mr_access_flags,
                                                    struct ib_pd *pd,
                                                    struct ib_udata *udata);
-       int                        (*query_mr)(struct ib_mr *mr,
-                                              struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
        struct ib_mr *             (*alloc_mr)(struct ib_pd *pd,
                                               enum ib_mr_type mr_type,
@@ -1750,18 +1807,8 @@ struct ib_device {
        int                        (*map_mr_sg)(struct ib_mr *mr,
                                                struct scatterlist *sg,
                                                int sg_nents);
-       int                        (*rereg_phys_mr)(struct ib_mr *mr,
-                                                   int mr_rereg_mask,
-                                                   struct ib_pd *pd,
-                                                   struct ib_phys_buf *phys_buf_array,
-                                                   int num_phys_buf,
-                                                   int mr_access_flags,
-                                                   u64 *iova_start);
        struct ib_mw *             (*alloc_mw)(struct ib_pd *pd,
                                               enum ib_mw_type type);
-       int                        (*bind_mw)(struct ib_qp *qp,
-                                             struct ib_mw *mw,
-                                             struct ib_mw_bind *mw_bind);
        int                        (*dealloc_mw)(struct ib_mw *mw);
        struct ib_fmr *            (*alloc_fmr)(struct ib_pd *pd,
                                                int mr_access_flags,
@@ -1823,6 +1870,7 @@ struct ib_device {
        u16                          is_switch:1;
        u8                           node_type;
        u8                           phys_port_cnt;
+       struct ib_device_attr        attrs;
 
        /**
         * The following mandatory functions are used only at device
@@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
        return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
 }
 
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+                                      size_t offset,
+                                      size_t len)
+{
+       const void __user *p = udata->inbuf + offset;
+       bool ret = false;
+       u8 *buf;
+
+       if (len > USHRT_MAX)
+               return false;
+
+       buf = kmalloc(len, GFP_KERNEL);
+       if (!buf)
+               return false;
+
+       if (copy_from_user(buf, p, len))
+               goto free;
+
+       ret = !memchr_inv(buf, 0, len);
+
+free:
+       kfree(buf);
+       return ret;
+}
+
 /**
  * ib_modify_qp_is_ok - Check that the supplied attribute mask
  * contains all required attributes and no attributes not allowed for
@@ -1912,9 +1985,6 @@ int ib_register_event_handler  (struct ib_event_handler *event_handler);
 int ib_unregister_event_handler(struct ib_event_handler *event_handler);
 void ib_dispatch_event(struct ib_event *event);
 
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr);
-
 int ib_query_port(struct ib_device *device,
                  u8 port_num, struct ib_port_attr *port_attr);
 
@@ -1967,6 +2037,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 }
 
 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags &
+               (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
 {
        return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
 }
@@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n
 
 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
 {
-       return device->port_immutable[port_num].core_cap_flags &
-               (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+       return rdma_protocol_ib(device, port_num) ||
+               rdma_protocol_roce(device, port_num);
 }
 
 /**
@@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device,
                   struct ib_port_modify *port_modify);
 
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index);
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index);
 
 int ib_find_pkey(struct ib_device *device,
                 u8 port_num, u16 pkey, u16 *index);
@@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
        return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
 }
 
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
 /**
  * ib_create_cq - Creates a CQ on the specified device.
  * @device: The device on which to create the CQ.
@@ -2838,13 +2925,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
                dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
 }
 
-/**
- * ib_query_mr - Retrieves information about a specific memory region.
- * @mr: The memory region to retrieve information about.
- * @mr_attr: The attributes of the specified memory region.
- */
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
 /**
  * ib_dereg_mr - Deregisters a memory region and removes it from the
  *   HCA translation table.
@@ -2881,42 +2961,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
        return ((rkey + 1) & mask) | (rkey & ~mask);
 }
 
-/**
- * ib_alloc_mw - Allocates a memory window.
- * @pd: The protection domain associated with the memory window.
- * @type: The type of the memory window (1 or 2).
- */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-/**
- * ib_bind_mw - Posts a work request to the send queue of the specified
- *   QP, which binds the memory window to the given address range and
- *   remote access attributes.
- * @qp: QP to post the bind work request on.
- * @mw: The memory window to bind.
- * @mw_bind: Specifies information about the memory window, including
- *   its address range, remote access rights, and associated memory region.
- *
- * If there is no immediate error, the function will update the rkey member
- * of the mw parameter to its new value. The bind operation can still fail
- * asynchronously.
- */
-static inline int ib_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       /* XXX reference counting in corresponding MR? */
-       return mw->device->bind_mw ?
-               mw->device->bind_mw(qp, mw, mw_bind) :
-               -ENOSYS;
-}
-
-/**
- * ib_dealloc_mw - Deallocates a memory window.
- * @mw: The memory window to deallocate.
- */
-int ib_dealloc_mw(struct ib_mw *mw);
-
 /**
  * ib_alloc_fmr - Allocates a unmapped fast memory region.
  * @pd: The protection domain associated with the unmapped region.
diff --git a/include/scsi/iser.h b/include/scsi/iser.h
new file mode 100644 (file)
index 0000000..2e678fa
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *     - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ISCSI_ISER_H
+#define ISCSI_ISER_H
+
+#define ISER_ZBVA_NOT_SUP              0x80
+#define ISER_SEND_W_INV_NOT_SUP                0x40
+#define ISERT_ZBVA_NOT_USED            0x80
+#define ISERT_SEND_W_INV_NOT_USED      0x40
+
+#define ISCSI_CTRL     0x10
+#define ISER_HELLO     0x20
+#define ISER_HELLORPLY 0x30
+
+#define ISER_VER       0x10
+#define ISER_WSV       0x08
+#define ISER_RSV       0x04
+
+/**
+ * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
+ *
+ * @flags:        flags support (zbva, send_w_inv)
+ * @rsvd:         reserved
+ */
+struct iser_cm_hdr {
+       u8      flags;
+       u8      rsvd[3];
+} __packed;
+
+/**
+ * struct iser_ctrl - iSER header of iSCSI control PDU
+ *
+ * @flags:        opcode and read/write valid bits
+ * @rsvd:         reserved
+ * @write_stag:   write rkey
+ * @write_va:     write virtual address
+ * @reaf_stag:    read rkey
+ * @read_va:      read virtual address
+ */
+struct iser_ctrl {
+       u8      flags;
+       u8      rsvd[3];
+       __be32  write_stag;
+       __be64  write_va;
+       __be32  read_stag;
+       __be64  read_va;
+} __packed;
+
+#endif /* ISCSI_ISER_H */
index fdabbb4..f730b91 100644 (file)
@@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+                              int count);
 
 /* main midi functions */
 
index 7990469..c4d76ff 100644 (file)
@@ -104,6 +104,7 @@ struct snd_timer_instance {
                           int event,
                           struct timespec * tstamp,
                           unsigned long resolution);
+       void (*disconnect)(struct snd_timer_instance *timeri);
        void *callback_data;
        unsigned long ticks;            /* auto-load ticks when expired */
        unsigned long cticks;           /* current ticks */
index 594b4b2..4e4b2fa 100644 (file)
@@ -43,7 +43,7 @@ struct extent_status;
        { EXT4_GET_BLOCKS_METADATA_NOFAIL,      "METADATA_NOFAIL" },    \
        { EXT4_GET_BLOCKS_NO_NORMALIZE,         "NO_NORMALIZE" },       \
        { EXT4_GET_BLOCKS_KEEP_SIZE,            "KEEP_SIZE" },          \
-       { EXT4_GET_BLOCKS_NO_LOCK,              "NO_LOCK" })
+       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" })
 
 #define show_mflags(flags) __print_flags(flags, "",    \
        { EXT4_MAP_NEW,         "N" },                  \
index 98feb1b..d6dfa05 100644 (file)
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
        TP_STRUCT__entry(
                __string(driver, fence->ops->get_driver_name(fence))
-               __string(timeline, fence->ops->get_driver_name(fence))
+               __string(timeline, fence->ops->get_timeline_name(fence))
                __field(unsigned int, context)
                __field(unsigned int, seqno)
 
index 0f803d2..47c6212 100644 (file)
@@ -46,10 +46,10 @@ SCAN_STATUS
 
 TRACE_EVENT(mm_khugepaged_scan_pmd,
 
-       TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
+       TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
                 bool referenced, int none_or_zero, int status),
 
-       TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
+       TP_ARGS(mm, page, writable, referenced, none_or_zero, status),
 
        TP_STRUCT__entry(
                __field(struct mm_struct *, mm)
@@ -62,7 +62,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
 
        TP_fast_assign(
                __entry->mm = mm;
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->writable = writable;
                __entry->referenced = referenced;
                __entry->none_or_zero = none_or_zero;
@@ -104,10 +104,10 @@ TRACE_EVENT(mm_collapse_huge_page,
 
 TRACE_EVENT(mm_collapse_huge_page_isolate,
 
-       TP_PROTO(unsigned long pfn, int none_or_zero,
+       TP_PROTO(struct page *page, int none_or_zero,
                 bool referenced, bool  writable, int status),
 
-       TP_ARGS(pfn, none_or_zero, referenced, writable, status),
+       TP_ARGS(page, none_or_zero, referenced, writable, status),
 
        TP_STRUCT__entry(
                __field(unsigned long, pfn)
@@ -118,7 +118,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
        ),
 
        TP_fast_assign(
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->none_or_zero = none_or_zero;
                __entry->referenced = referenced;
                __entry->writable = writable;
index ff8f6c0..f95f25e 100644 (file)
@@ -15,7 +15,7 @@ struct softirq_action;
                         softirq_name(NET_TX)           \
                         softirq_name(NET_RX)           \
                         softirq_name(BLOCK)            \
-                        softirq_name(BLOCK_IOPOLL)     \
+                        softirq_name(IRQ_POLL)         \
                         softirq_name(TASKLET)          \
                         softirq_name(SCHED)            \
                         softirq_name(HRTIMER)          \
index 4cc989a..f95e1c4 100644 (file)
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
index b8a5b3b..149bec8 100644 (file)
@@ -2,8 +2,11 @@
 #define _UAPI_LINUX_FS_H
 
 /*
- * This file has definitions for some important file table
- * structures etc.
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
  */
 
 #include <linux/limits.h>
@@ -146,6 +149,37 @@ struct inodes_stat_t {
 #define MS_MGC_VAL 0xC0ED0000
 #define MS_MGC_MSK 0xffff0000
 
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_DAX           0x00008000      /* use DAX for IO */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
+
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
 
@@ -188,7 +222,6 @@ struct inodes_stat_t {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
@@ -210,9 +243,28 @@ struct inodes_stat_t {
 #define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
 #define FS_IOC32_GETVERSION            _IOR('v', 1, int)
 #define FS_IOC32_SETVERSION            _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR              _IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW ('X', 32, struct fsxattr)
 
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
  */
 #define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
 #define        FS_UNRM_FL                      0x00000002 /* Undelete */
@@ -226,8 +278,8 @@ struct inodes_stat_t {
 #define FS_DIRTY_FL                    0x00000100
 #define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
 #define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
-#define FS_ECOMPR_FL                   0x00000800 /* Compression error */
 /* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL                  0x00000800 /* Encrypted file */
 #define FS_BTREE_FL                    0x00001000 /* btree format dir */
 #define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
 #define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
@@ -235,9 +287,12 @@ struct inodes_stat_t {
 #define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
 #define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
-#define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
+#define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
index f4617cf..781c139 100644 (file)
@@ -795,7 +795,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 
        ro = mnt_want_write(mnt);       /* we'll drop it in any case */
        error = 0;
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        path.dentry = lookup_one_len(name->name, root, strlen(name->name));
        if (IS_ERR(path.dentry)) {
                error = PTR_ERR(path.dentry);
@@ -841,7 +841,7 @@ out_putfd:
                put_unused_fd(fd);
                fd = error;
        }
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        if (!ro)
                mnt_drop_write(mnt);
 out_putname:
@@ -866,7 +866,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        err = mnt_want_write(mnt);
        if (err)
                goto out_name;
-       mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
        dentry = lookup_one_len(name->name, mnt->mnt_root,
                                strlen(name->name));
        if (IS_ERR(dentry)) {
@@ -884,7 +884,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        dput(dentry);
 
 out_unlock:
-       mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
+       inode_unlock(d_inode(mnt->mnt_root));
        if (inode)
                iput(inode);
        mnt_drop_write(mnt);
index b471e5a..cddd5b5 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1493,7 +1493,7 @@ out_rcu_wakeup:
        wake_up_sem_queue_do(&tasks);
 out_free:
        if (sem_io != fast_sem_io)
-               ipc_free(sem_io, sizeof(ushort)*nsems);
+               ipc_free(sem_io);
        return err;
 }
 
index 0f401d9..798cad1 100644 (file)
@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
 /**
  * ipc_free - free ipc space
  * @ptr: pointer returned by ipc_alloc
- * @size: size of block
  *
- * Free a block created with ipc_alloc(). The caller must know the size
- * used in the allocation call.
+ * Free a block created with ipc_alloc().
  */
-void ipc_free(void *ptr, int size)
+void ipc_free(void *ptr)
 {
-       if (size > PAGE_SIZE)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /**
index 3a8a5a0..51f7ca5 100644 (file)
@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
  * both function can sleep
  */
 void *ipc_alloc(int size);
-void ipc_free(void *ptr, int size);
+void ipc_free(void *ptr);
 
 /*
  * For allocation that need to be freed by RCU.
index 27c6046..f84f8d0 100644 (file)
@@ -95,7 +95,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
        if (IS_ERR(dentry))
                return (void *)dentry; /* returning an error */
        inode = path.dentry->d_inode;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
        if (unlikely(!audit_mark)) {
index 656c7e9..9f194aa 100644 (file)
@@ -364,7 +364,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
        struct dentry *d = kern_path_locked(watch->path, parent);
        if (IS_ERR(d))
                return PTR_ERR(d);
-       mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+       inode_unlock(d_backing_inode(parent->dentry));
        if (d_is_positive(d)) {
                /* update watch filter fields */
                watch->dev = d_backing_inode(d)->i_sb->s_dev;
index b0799bc..89ebbc4 100644 (file)
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
        struct perf_event *event;
        const struct perf_event_attr *attr;
+       struct file *file;
 
-       event = perf_event_get(fd);
-       if (IS_ERR(event))
-               return event;
+       file = perf_event_get(fd);
+       if (IS_ERR(file))
+               return file;
+
+       event = file->private_data;
 
        attr = perf_event_attrs(event);
        if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
                goto err;
 
        if (attr->type == PERF_TYPE_RAW)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_HARDWARE)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_SOFTWARE &&
            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-               return event;
+               return file;
 err:
-       perf_event_release_kernel(event);
+       fput(file);
        return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-       struct perf_event *event = ptr;
-
-       perf_event_release_kernel(event);
+       fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
index c095741..5946460 100644 (file)
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -126,44 +124,181 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
        return data.ret;
 }
 
-static void event_function_call(struct perf_event *event,
-                               int (*active)(void *),
-                               void (*inactive)(void *),
-                               void *data)
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+                         struct perf_event_context *ctx)
 {
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx)
+               raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+                           struct perf_event_context *ctx)
+{
+       if (ctx)
+               raw_spin_unlock(&ctx->lock);
+       raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
+#define TASK_TOMBSTONE ((void *)-1L)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+       return READ_ONCE(event->owner) == TASK_TOMBSTONE;
+}
+
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
+ * to reliably determine if a particular task/context is scheduled in. The
+ * task_curr() use in task_function_call() is racy in that a remote context
+ * switch is not a single atomic operation.
+ *
+ * As is, the situation is 'safe' because we set rq->curr before we do the
+ * actual context switch. This means that task_curr() will fail early, but
+ * we'll continue spinning on ctx->is_active until we've passed
+ * perf_event_task_sched_out().
+ *
+ * Without this ctx->lock serialized variable we could have race where we find
+ * the task (and hence the context) would not be active while in fact they are.
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+                       struct perf_event_context *, void *);
+
+struct event_function_struct {
+       struct perf_event *event;
+       event_f func;
+       void *data;
+};
+
+static int event_function(void *info)
+{
+       struct event_function_struct *efs = info;
+       struct perf_event *event = efs->event;
        struct perf_event_context *ctx = event->ctx;
-       struct task_struct *task = ctx->task;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event_context *task_ctx = cpuctx->task_ctx;
+       int ret = 0;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       perf_ctx_lock(cpuctx, task_ctx);
+       /*
+        * Since we do the IPI call without holding ctx->lock things can have
+        * changed, double check we hit the task we set out to hit.
+        */
+       if (ctx->task) {
+               if (ctx->task != current) {
+                       ret = -EAGAIN;
+                       goto unlock;
+               }
+
+               /*
+                * We only use event_function_call() on established contexts,
+                * and event_function() is only ever called when active (or
+                * rather, we'll have bailed in task_function_call() or the
+                * above ctx->task != current test), therefore we must have
+                * ctx->is_active here.
+                */
+               WARN_ON_ONCE(!ctx->is_active);
+               /*
+                * And since we have ctx->is_active, cpuctx->task_ctx must
+                * match.
+                */
+               WARN_ON_ONCE(task_ctx != ctx);
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       efs->func(event, cpuctx, ctx, efs->data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+
+       return ret;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       int ret = event_function(&efs);
+       WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       if (!event->parent) {
+               /*
+                * If this is a !child event, we must hold ctx::mutex to
+                * stabilize the the event->ctx relation. See
+                * perf_event_ctx_lock().
+                */
+               lockdep_assert_held(&ctx->mutex);
+       }
 
        if (!task) {
-               cpu_function_call(event->cpu, active, data);
+               cpu_function_call(event->cpu, event_function, &efs);
                return;
        }
 
 again:
-       if (!task_function_call(task, active, data))
+       if (task == TASK_TOMBSTONE)
+               return;
+
+       if (!task_function_call(task, event_function, &efs))
                return;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (ctx->is_active) {
-               /*
-                * Reload the task pointer, it might have been changed by
-                * a concurrent perf_event_context_sched_out().
-                */
-               task = ctx->task;
-               raw_spin_unlock_irq(&ctx->lock);
-               goto again;
+       /*
+        * Reload the task pointer, it might have been changed by
+        * a concurrent perf_event_context_sched_out().
+        */
+       task = ctx->task;
+       if (task != TASK_TOMBSTONE) {
+               if (ctx->is_active) {
+                       raw_spin_unlock_irq(&ctx->lock);
+                       goto again;
+               }
+               func(event, NULL, ctx, data);
        }
-       inactive(data);
        raw_spin_unlock_irq(&ctx->lock);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
-
-static bool is_kernel_event(struct perf_event *event)
-{
-       return event->owner == EVENT_OWNER_KERNEL;
-}
-
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -368,28 +503,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
        return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-                         struct perf_event_context *ctx)
-{
-       raw_spin_lock(&cpuctx->ctx.lock);
-       if (ctx)
-               raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-                           struct perf_event_context *ctx)
-{
-       if (ctx)
-               raw_spin_unlock(&ctx->lock);
-       raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -579,13 +692,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /*
-        * next is NULL when called from perf_event_enable_on_exec()
-        * that will systematically cause a cgroup_switch()
-        */
-       if (next)
-               cgrp2 = perf_cgroup_from_task(next, NULL);
+       cgrp2 = perf_cgroup_from_task(next, NULL);
 
        /*
         * only schedule out current cgroup events if we know
@@ -611,8 +718,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /* prev can never be NULL */
        cgrp2 = perf_cgroup_from_task(prev, NULL);
 
        /*
@@ -917,7 +1022,7 @@ static void put_ctx(struct perf_event_context *ctx)
        if (atomic_dec_and_test(&ctx->refcount)) {
                if (ctx->parent_ctx)
                        put_ctx(ctx->parent_ctx);
-               if (ctx->task)
+               if (ctx->task && ctx->task != TASK_TOMBSTONE)
                        put_task_struct(ctx->task);
                call_rcu(&ctx->rcu_head, free_ctx);
        }
@@ -934,9 +1039,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()   [ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()                        [ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()                  [ parent, 1 ]
  *
  *  - perf_event_init_context()                [ parent, 0 ]
  *      inherit_task_group()
@@ -979,8 +1083,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * Lock order:
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
- *         perf_event_context::lock
  *         perf_event::child_mutex;
+ *           perf_event_context::lock
  *         perf_event::mmap_mutex
  *         mmap_sem
  */
@@ -1078,6 +1182,7 @@ static u64 primary_event_id(struct perf_event *event)
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1118,9 +1223,12 @@ retry:
                        goto retry;
                }
 
-               if (!atomic_inc_not_zero(&ctx->refcount)) {
+               if (ctx->task == TASK_TOMBSTONE ||
+                   !atomic_inc_not_zero(&ctx->refcount)) {
                        raw_spin_unlock(&ctx->lock);
                        ctx = NULL;
+               } else {
+                       WARN_ON_ONCE(ctx->task != task);
                }
        }
        rcu_read_unlock();
@@ -1246,6 +1354,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+       lockdep_assert_held(&ctx->lock);
+
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -1448,11 +1558,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        if (is_cgroup_event(event)) {
                ctx->nr_cgroups--;
+               /*
+                * Because cgroup events are always per-cpu events, this will
+                * always be called from the right CPU.
+                */
                cpuctx = __get_cpu_context(ctx);
                /*
-                * if there are no more cgroup events
-                * then cler cgrp to avoid stale pointer
-                * in update_cgrp_time_from_cpuctx()
+                * If there are no more cgroup events then clear cgrp to avoid
+                * stale pointer in update_cgrp_time_from_cpuctx().
                 */
                if (!ctx->nr_cgroups)
                        cpuctx->cgrp = NULL;
@@ -1530,45 +1643,11 @@ out:
                perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-       return event && !is_kernel_event(event) && !event->owner;
+       return event->state == PERF_EVENT_STATE_EXIT;
 }
 
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-       return is_orphaned_event(event->parent);
-}
-
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-       if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-               return;
-
-       if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-               get_ctx(ctx);
-               ctx->orphans_remove_sched = true;
-       }
-}
-
-static int __init perf_workqueue_init(void)
-{
-       perf_wq = create_singlethread_workqueue("perf");
-       WARN(!perf_wq, "failed to create perf workqueue\n");
-       return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
@@ -1629,9 +1708,6 @@ event_sched_out(struct perf_event *event,
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
        perf_pmu_enable(event->pmu);
 }
 
@@ -1655,21 +1731,8 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-       struct perf_event *event;
-       bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-
-       if (re->detach_group)
-               perf_group_detach(event);
-       list_del_event(event, ctx);
-}
+#define DETACH_GROUP   0x01UL
+#define DETACH_STATE   0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1677,33 +1740,33 @@ static void ___perf_remove_from_context(void *info)
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+                          struct perf_cpu_context *cpuctx,
+                          struct perf_event_context *ctx,
+                          void *info)
 {
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       unsigned long flags = (unsigned long)info;
 
-       raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
-       if (re->detach_group)
+       if (flags & DETACH_GROUP)
                perf_group_detach(event);
        list_del_event(event, ctx);
-       if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+       if (flags & DETACH_STATE)
+               event->state = PERF_EVENT_STATE_EXIT;
+
+       if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
-               cpuctx->task_ctx = NULL;
+               if (ctx->task) {
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+                       cpuctx->task_ctx = NULL;
+               }
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1711,73 +1774,32 @@ static int __perf_remove_from_context(void *info)
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-       struct perf_event_context *ctx = event->ctx;
-       struct remove_event re = {
-               .event = event,
-               .detach_group = detach_group,
-       };
+       lockdep_assert_held(&event->ctx->mutex);
 
-       lockdep_assert_held(&ctx->mutex);
-
-       event_function_call(event, __perf_remove_from_context,
-                           ___perf_remove_from_context, &re);
+       event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
-{
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-       /*
-        * If this is a per-task event, need to check whether this
-        * event's task is the current task on this cpu.
-        *
-        * Can trigger due to concurrent perf_event_context_sched_out()
-        * flipping contexts around.
-        */
-       if (ctx->task && cpuctx->task_ctx != ctx)
-               return -EINVAL;
-
-       raw_spin_lock(&ctx->lock);
-
-       /*
-        * If the event is on, turn it off.
-        * If it is in error state, leave it in error state.
-        */
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-               update_context_time(ctx);
-               update_cgrp_time_from_event(event);
-               update_group_times(event);
-               if (event == event->group_leader)
-                       group_sched_out(event, cpuctx, ctx);
-               else
-                       event_sched_out(event, cpuctx, ctx);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
-
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
-}
-
-void ___perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+                                struct perf_cpu_context *cpuctx,
+                                struct perf_event_context *ctx,
+                                void *info)
 {
-       struct perf_event *event = info;
+       if (event->state < PERF_EVENT_STATE_INACTIVE)
+               return;
 
-       /*
-        * Since we have the lock this context can't be scheduled
-        * in, so we can change the state safely.
-        */
-       if (event->state == PERF_EVENT_STATE_INACTIVE) {
-               update_group_times(event);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
+       update_context_time(ctx);
+       update_cgrp_time_from_event(event);
+       update_group_times(event);
+       if (event == event->group_leader)
+               group_sched_out(event, cpuctx, ctx);
+       else
+               event_sched_out(event, cpuctx, ctx);
+       event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1788,7 +1810,8 @@ void ___perf_event_disable(void *info)
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1827,12 @@ static void _perf_event_disable(struct perf_event *event)
        }
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_disable,
-                           ___perf_event_disable, event);
+       event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+       event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -1918,9 +1945,6 @@ event_sched_in(struct perf_event *event,
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
 out:
        perf_pmu_enable(event->pmu);
 
@@ -2039,7 +2063,8 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
@@ -2058,16 +2083,15 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+                       struct perf_event_context *task_ctx)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-
-       /*
-        * Since the task isn't running, its safe to add the event, us holding
-        * the ctx->lock ensures the task won't get scheduled in.
-        */
-       add_event_to_ctx(event, ctx);
+       perf_pmu_disable(cpuctx->ctx.pmu);
+       if (task_ctx)
+               task_ctx_sched_out(cpuctx, task_ctx);
+       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
 /*
@@ -2077,55 +2101,31 @@ static void ___perf_install_in_context(void *info)
  */
 static int  __perf_install_in_context(void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
+       struct perf_event_context *ctx = info;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
-       struct task_struct *task = current;
-
-       perf_ctx_lock(cpuctx, task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
 
-       /*
-        * If there was an active task_ctx schedule it out.
-        */
-       if (task_ctx)
-               task_ctx_sched_out(task_ctx);
-
-       /*
-        * If the context we're installing events in is not the
-        * active task_ctx, flip them.
-        */
-       if (ctx->task && task_ctx != ctx) {
-               if (task_ctx)
-                       raw_spin_unlock(&task_ctx->lock);
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx->task) {
                raw_spin_lock(&ctx->lock);
+               /*
+                * If we hit the 'wrong' task, we've since scheduled and
+                * everything should be sorted, nothing to do!
+                */
                task_ctx = ctx;
-       }
+               if (ctx->task != current)
+                       goto unlock;
 
-       if (task_ctx) {
-               cpuctx->task_ctx = task_ctx;
-               task = task_ctx->task;
+               /*
+                * If task_ctx is set, it had better be to us.
+                */
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
+       } else if (task_ctx) {
+               raw_spin_lock(&task_ctx->lock);
        }
 
-       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
-       update_context_time(ctx);
-       /*
-        * update cgrp time only if current cgrp
-        * matches event->cgrp. Must be done before
-        * calling add_event_to_ctx()
-        */
-       update_cgrp_time_from_event(event);
-
-       add_event_to_ctx(event, ctx);
-
-       /*
-        * Schedule everything back in
-        */
-       perf_event_sched_in(cpuctx, task_ctx, task);
-
-       perf_pmu_enable(cpuctx->ctx.pmu);
+       ctx_resched(cpuctx, task_ctx);
+unlock:
        perf_ctx_unlock(cpuctx, task_ctx);
 
        return 0;
@@ -2133,27 +2133,54 @@ static int  __perf_install_in_context(void *info)
 
 /*
  * Attach a performance event to a context
- *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
- *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
                        struct perf_event *event,
                        int cpu)
 {
+       struct task_struct *task = NULL;
+
        lockdep_assert_held(&ctx->mutex);
 
        event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
-       event_function_call(event, __perf_install_in_context,
-                           ___perf_install_in_context, event);
+       /*
+        * Installing events is tricky because we cannot rely on ctx->is_active
+        * to be set in case this is the nr_events 0 -> 1 transition.
+        *
+        * So what we do is we add the event to the list here, which will allow
+        * a future context switch to DTRT and then send a racy IPI. If the IPI
+        * fails to hit the right task, this means a context switch must have
+        * happened and that will have taken care of business.
+        */
+       raw_spin_lock_irq(&ctx->lock);
+       task = ctx->task;
+       /*
+        * Worse, we cannot even rely on the ctx actually existing anymore. If
+        * between find_get_context() and perf_install_in_context() the task
+        * went through perf_event_exit_task() its dead and we should not be
+        * adding new events.
+        */
+       if (task == TASK_TOMBSTONE) {
+               raw_spin_unlock_irq(&ctx->lock);
+               return;
+       }
+       update_context_time(ctx);
+       /*
+        * Update cgrp time only if current cgrp matches event->cgrp.
+        * Must be done before calling add_event_to_ctx().
+        */
+       update_cgrp_time_from_event(event);
+       add_event_to_ctx(event, ctx);
+       raw_spin_unlock_irq(&ctx->lock);
+
+       if (task)
+               task_function_call(task, __perf_install_in_context, ctx);
+       else
+               cpu_function_call(cpu, __perf_install_in_context, ctx);
 }
 
 /*
@@ -2180,43 +2207,30 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
        struct perf_event *leader = event->group_leader;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-       int err;
+       struct perf_event_context *task_ctx;
 
-       /*
-        * There's a time window between 'ctx->is_active' check
-        * in perf_event_enable function and this place having:
-        *   - IRQs on
-        *   - ctx->lock unlocked
-        *
-        * where the task could be killed and 'ctx' deactivated
-        * by perf_event_exit_task.
-        */
-       if (!ctx->is_active)
-               return -EINVAL;
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <= PERF_EVENT_STATE_ERROR)
+               return;
 
-       raw_spin_lock(&ctx->lock);
        update_context_time(ctx);
-
-       if (event->state >= PERF_EVENT_STATE_INACTIVE)
-               goto unlock;
-
-       /*
-        * set current task's cgroup time reference point
-        */
-       perf_cgroup_set_timestamp(current, ctx);
-
        __perf_event_mark_enabled(event);
 
+       if (!ctx->is_active)
+               return;
+
        if (!event_filter_match(event)) {
-               if (is_cgroup_event(event))
+               if (is_cgroup_event(event)) {
+                       perf_cgroup_set_timestamp(current, ctx); // XXX ?
                        perf_cgroup_defer_enabled(event);
-               goto unlock;
+               }
+               return;
        }
 
        /*
@@ -2224,41 +2238,13 @@ static int __perf_event_enable(void *info)
         * then don't put it on unless the group is on.
         */
        if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-               goto unlock;
-
-       if (!group_can_go_on(event, cpuctx, 1)) {
-               err = -EEXIST;
-       } else {
-               if (event == leader)
-                       err = group_sched_in(event, cpuctx, ctx);
-               else
-                       err = event_sched_in(event, cpuctx, ctx);
-       }
-
-       if (err) {
-               /*
-                * If this event can't go on and it's part of a
-                * group, then the whole group has to come off.
-                */
-               if (leader != event) {
-                       group_sched_out(leader, cpuctx, ctx);
-                       perf_mux_hrtimer_restart(cpuctx);
-               }
-               if (leader->attr.pinned) {
-                       update_group_times(leader);
-                       leader->state = PERF_EVENT_STATE_ERROR;
-               }
-       }
-
-unlock:
-       raw_spin_unlock(&ctx->lock);
+               return;
 
-       return 0;
-}
+       task_ctx = cpuctx->task_ctx;
+       if (ctx->task)
+               WARN_ON_ONCE(task_ctx != ctx);
 
-void ___perf_event_enable(void *info)
-{
-       __perf_event_mark_enabled((struct perf_event *)info);
+       ctx_resched(cpuctx, task_ctx);
 }
 
 /*
@@ -2275,7 +2261,8 @@ static void _perf_event_enable(struct perf_event *event)
        struct perf_event_context *ctx = event->ctx;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <  PERF_EVENT_STATE_ERROR) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
@@ -2291,8 +2278,7 @@ static void _perf_event_enable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_enable,
-                           ___perf_event_enable, event);
+       event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -2342,12 +2328,27 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                          struct perf_cpu_context *cpuctx,
                          enum event_type_t event_type)
 {
-       struct perf_event *event;
        int is_active = ctx->is_active;
+       struct perf_event *event;
 
-       ctx->is_active &= ~event_type;
-       if (likely(!ctx->nr_events))
+       lockdep_assert_held(&ctx->lock);
+
+       if (likely(!ctx->nr_events)) {
+               /*
+                * See __perf_remove_from_context().
+                */
+               WARN_ON_ONCE(ctx->is_active);
+               if (ctx->task)
+                       WARN_ON_ONCE(cpuctx->task_ctx);
                return;
+       }
+
+       ctx->is_active &= ~event_type;
+       if (ctx->task) {
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+               if (!ctx->is_active)
+                       cpuctx->task_ctx = NULL;
+       }
 
        update_context_time(ctx);
        update_cgrp_time_from_cpuctx(cpuctx);
@@ -2518,17 +2519,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
-                       /*
-                        * XXX do we need a memory barrier of sorts
-                        * wrt to rcu_dereference() of perf_event_ctxp
-                        */
-                       task->perf_event_ctxp[ctxn] = next_ctx;
-                       next->perf_event_ctxp[ctxn] = ctx;
-                       ctx->task = next;
-                       next_ctx->task = task;
+                       WRITE_ONCE(ctx->task, next);
+                       WRITE_ONCE(next_ctx->task, task);
 
                        swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+                       /*
+                        * RCU_INIT_POINTER here is safe because we've not
+                        * modified the ctx and the above modification of
+                        * ctx->task and ctx->task_ctx_data are immaterial
+                        * since those values are always verified under
+                        * ctx->lock which we're now holding.
+                        */
+                       RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+                       RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
                        do_switch = 0;
 
                        perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2546,7 @@ unlock:
 
        if (do_switch) {
                raw_spin_lock(&ctx->lock);
-               ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-               cpuctx->task_ctx = NULL;
+               task_ctx_sched_out(cpuctx, ctx);
                raw_spin_unlock(&ctx->lock);
        }
 }
@@ -2637,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
                perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx)
 {
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
        if (!cpuctx->task_ctx)
                return;
 
@@ -2648,7 +2651,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
                return;
 
        ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-       cpuctx->task_ctx = NULL;
 }
 
 /*
@@ -2725,13 +2727,22 @@ ctx_sched_in(struct perf_event_context *ctx,
             enum event_type_t event_type,
             struct task_struct *task)
 {
-       u64 now;
        int is_active = ctx->is_active;
+       u64 now;
+
+       lockdep_assert_held(&ctx->lock);
 
-       ctx->is_active |= event_type;
        if (likely(!ctx->nr_events))
                return;
 
+       ctx->is_active |= event_type;
+       if (ctx->task) {
+               if (!is_active)
+                       cpuctx->task_ctx = ctx;
+               else
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+       }
+
        now = perf_clock();
        ctx->timestamp = now;
        perf_cgroup_set_timestamp(task, ctx);
@@ -2773,12 +2784,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         * cpu flexible, task flexible.
         */
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-       if (ctx->nr_events)
-               cpuctx->task_ctx = ctx;
-
-       perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+       perf_event_sched_in(cpuctx, ctx, task);
        perf_pmu_enable(ctx->pmu);
        perf_ctx_unlock(cpuctx, ctx);
 }
@@ -2800,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * If cgroup events exist on this CPU, then we need to check if we have
+        * to switch in PMU state; cgroup event are system-wide mode only.
+        *
+        * Since cgroup events are CPU events, we must schedule these in before
+        * we schedule in the task events.
+        */
+       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+               perf_cgroup_sched_in(prev, task);
+
        for_each_task_context_nr(ctxn) {
                ctx = task->perf_event_ctxp[ctxn];
                if (likely(!ctx))
@@ -2807,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
                perf_event_context_sched_in(ctx, task);
        }
-       /*
-        * if cgroup events exist on this CPU, then we need
-        * to check if we have to switch in PMU state.
-        * cgroup event are system-wide mode only
-        */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_in(prev, task);
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
@@ -3099,46 +3108,30 @@ static int event_enable_on_exec(struct perf_event *event,
 static void perf_event_enable_on_exec(int ctxn)
 {
        struct perf_event_context *ctx, *clone_ctx = NULL;
+       struct perf_cpu_context *cpuctx;
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;
-       int ret;
 
        local_irq_save(flags);
        ctx = current->perf_event_ctxp[ctxn];
        if (!ctx || !ctx->nr_events)
                goto out;
 
-       /*
-        * We must ctxsw out cgroup events to avoid conflict
-        * when invoking perf_task_event_sched_in() later on
-        * in this function. Otherwise we end up trying to
-        * ctxswin cgroup events which are already scheduled
-        * in.
-        */
-       perf_cgroup_sched_out(current, NULL);
-
-       raw_spin_lock(&ctx->lock);
-       task_ctx_sched_out(ctx);
-
-       list_for_each_entry(event, &ctx->event_list, event_entry) {
-               ret = event_enable_on_exec(event, ctx);
-               if (ret)
-                       enabled = 1;
-       }
+       cpuctx = __get_cpu_context(ctx);
+       perf_ctx_lock(cpuctx, ctx);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               enabled |= event_enable_on_exec(event, ctx);
 
        /*
-        * Unclone this context if we enabled any event.
+        * Unclone and reschedule this context if we enabled any event.
         */
-       if (enabled)
+       if (enabled) {
                clone_ctx = unclone_ctx(ctx);
+               ctx_resched(cpuctx, ctx);
+       }
+       perf_ctx_unlock(cpuctx, ctx);
 
-       raw_spin_unlock(&ctx->lock);
-
-       /*
-        * Also calls ctxswin for cgroup events, if any:
-        */
-       perf_event_context_sched_in(ctx, ctx->task);
 out:
        local_irq_restore(flags);
 
@@ -3334,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
        INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
-       INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3521,11 +3513,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
 static void unaccount_event(struct perf_event *event)
 {
+       bool dec = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
        if (event->attr.comm)
@@ -3535,12 +3529,15 @@ static void unaccount_event(struct perf_event *event)
        if (event->attr.freq)
                atomic_dec(&nr_freq_events);
        if (event->attr.context_switch) {
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
                atomic_dec(&nr_switch_events);
        }
        if (is_cgroup_event(event))
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (has_branch_stack(event))
+               dec = true;
+
+       if (dec)
                static_key_slow_dec_deferred(&perf_sched_events);
 
        unaccount_event_cpu(event, event->cpu);
@@ -3556,7 +3553,7 @@ static void unaccount_event(struct perf_event *event)
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3631,29 +3628,6 @@ static bool exclusive_event_installable(struct perf_event *event,
        return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-       if (!event->parent) {
-               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-                       put_callchain_buffers();
-       }
-
-       perf_event_free_bpf_prog(event);
-
-       if (event->destroy)
-               event->destroy(event);
-
-       if (event->ctx)
-               put_ctx(event->ctx);
-
-       if (event->pmu) {
-               exclusive_event_destroy(event);
-               module_put(event->pmu->module);
-       }
-
-       call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
@@ -3675,7 +3649,25 @@ static void _free_event(struct perf_event *event)
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
 
-       __free_event(event);
+       if (!event->parent) {
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+                       put_callchain_buffers();
+       }
+
+       perf_event_free_bpf_prog(event);
+
+       if (event->destroy)
+               event->destroy(event);
+
+       if (event->ctx)
+               put_ctx(event->ctx);
+
+       if (event->pmu) {
+               exclusive_event_destroy(event);
+               module_put(event->pmu->module);
+       }
+
+       call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*
@@ -3702,14 +3694,13 @@ static void perf_remove_from_owner(struct perf_event *event)
        struct task_struct *owner;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(event->owner);
        /*
-        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-        * !owner it means the list deletion is complete and we can indeed
-        * free this event, otherwise we need to serialize on
+        * Matches the smp_store_release() in perf_event_exit_task(). If we
+        * observe !owner it means the list deletion is complete and we can
+        * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
-       smp_read_barrier_depends();
+       owner = lockless_dereference(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3728,10 @@ static void perf_remove_from_owner(struct perf_event *event)
                 * ensured they're done, and we can proceed with freeing the
                 * event.
                 */
-               if (event->owner)
+               if (event->owner) {
                        list_del_init(&event->owner_entry);
+                       smp_store_release(&event->owner, NULL);
+               }
                mutex_unlock(&owner->perf_event_mutex);
                put_task_struct(owner);
        }
@@ -3746,36 +3739,98 @@ static void perf_remove_from_owner(struct perf_event *event)
 
 static void put_event(struct perf_event *event)
 {
-       struct perf_event_context *ctx;
-
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       _free_event(event);
+}
+
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
+int perf_event_release_kernel(struct perf_event *event)
+{
+       struct perf_event_context *ctx;
+       struct perf_event *child, *tmp;
+
        if (!is_kernel_event(event))
                perf_remove_from_owner(event);
 
+       ctx = perf_event_ctx_lock(event);
+       WARN_ON_ONCE(ctx->parent_ctx);
+       perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
+       perf_event_ctx_unlock(event, ctx);
+
        /*
-        * There are two ways this annotation is useful:
+        * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
+        * either from the above perf_remove_from_context() or through
+        * perf_event_exit_event().
         *
-        *  1) there is a lock recursion from perf_event_exit_task
-        *     see the comment there.
+        * Therefore, anybody acquiring event->child_mutex after the below
+        * loop _must_ also see this, most importantly inherit_event() which
+        * will avoid placing more children on the list.
         *
-        *  2) there is a lock-inversion with mmap_sem through
-        *     perf_read_group(), which takes faults while
-        *     holding ctx->mutex, however this is called after
-        *     the last filedesc died, so there is no possibility
-        *     to trigger the AB-BA case.
+        * Thus this guarantees that we will in fact observe and kill _ALL_
+        * child events.
         */
-       ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
-       WARN_ON_ONCE(ctx->parent_ctx);
-       perf_remove_from_context(event, true);
-       perf_event_ctx_unlock(event, ctx);
+       WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
 
-       _free_event(event);
-}
+again:
+       mutex_lock(&event->child_mutex);
+       list_for_each_entry(child, &event->child_list, child_list) {
 
-int perf_event_release_kernel(struct perf_event *event)
-{
+               /*
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+               ctx = lockless_dereference(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+                *
+                * Since the event cannot get freed while we hold the
+                * child_mutex, the context must also exist and have a !0
+                * reference count.
+                */
+               get_ctx(ctx);
+
+               /*
+                * Now that we have a ctx ref, we can drop child_mutex, and
+                * acquire ctx::mutex without fear of it going away. Then we
+                * can re-acquire child_mutex.
+                */
+               mutex_unlock(&event->child_mutex);
+               mutex_lock(&ctx->mutex);
+               mutex_lock(&event->child_mutex);
+
+               /*
+                * Now that we hold ctx::mutex and child_mutex, revalidate our
+                * state, if child is still the first entry, it didn't get freed
+                * and we can continue doing so.
+                */
+               tmp = list_first_entry_or_null(&event->child_list,
+                                              struct perf_event, child_list);
+               if (tmp == child) {
+                       perf_remove_from_context(child, DETACH_GROUP);
+                       list_del(&child->child_list);
+                       free_event(child);
+                       /*
+                        * This matches the refcount bump in inherit_event();
+                        * this can't be the last reference.
+                        */
+                       put_event(event);
+               }
+
+               mutex_unlock(&event->child_mutex);
+               mutex_unlock(&ctx->mutex);
+               put_ctx(ctx);
+               goto again;
+       }
+       mutex_unlock(&event->child_mutex);
+
+       /* Must be the last reference */
        put_event(event);
        return 0;
 }
@@ -3786,46 +3841,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-       put_event(file->private_data);
+       perf_event_release_kernel(file->private_data);
        return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-       struct perf_event_context *ctx;
-       struct perf_event *event, *tmp;
-
-       ctx = container_of(work, struct perf_event_context,
-                          orphans_remove.work);
-
-       mutex_lock(&ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-               struct perf_event *parent_event = event->parent;
-
-               if (!is_orphaned_child(event))
-                       continue;
-
-               perf_remove_from_context(event, true);
-
-               mutex_lock(&parent_event->child_mutex);
-               list_del_init(&event->child_list);
-               mutex_unlock(&parent_event->child_mutex);
-
-               free_event(event);
-               put_event(parent_event);
-       }
-
-       raw_spin_lock_irq(&ctx->lock);
-       ctx->orphans_remove_sched = false;
-       raw_spin_unlock_irq(&ctx->lock);
-       mutex_unlock(&ctx->mutex);
-
-       put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -4054,7 +4073,7 @@ static void _perf_event_reset(struct perf_event *event)
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4105,14 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-       struct perf_event *event;
-       u64 value;
-};
-
-static void ___perf_event_period(void *info)
-{
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       u64 value = pe->value;
-
-       if (event->attr.freq) {
-               event->attr.sample_freq = value;
-       } else {
-               event->attr.sample_period = value;
-               event->hw.sample_period = value;
-       }
-
-       local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       struct perf_event_context *ctx = event->ctx;
-       u64 value = pe->value;
+       u64 value = *((u64 *)info);
        bool active;
 
-       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
                event->attr.sample_freq = value;
        } else {
@@ -4135,14 +4132,10 @@ static int __perf_event_period(void *info)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-       struct period_event pe = { .event = event, };
        u64 value;
 
        if (!is_sampling_event(event))
@@ -4157,10 +4150,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        if (event->attr.freq && value > sysctl_perf_event_sample_rate)
                return -EINVAL;
 
-       pe.value = value;
-
-       event_function_call(event, __perf_event_period,
-                           ___perf_event_period, &pe);
+       event_function_call(event, __perf_event_period, &value);
 
        return 0;
 }
@@ -4872,9 +4862,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
        struct perf_event *event = filp->private_data;
        int retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = fasync_helper(fd, filp, on, &event->fasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval < 0)
                return retval;
@@ -4932,7 +4922,7 @@ static void perf_pending_event(struct irq_work *entry)
 
        if (event->pending_disable) {
                event->pending_disable = 0;
-               __perf_event_disable(event);
+               perf_event_disable_local(event);
        }
 
        if (event->pending_wakeup) {
@@ -7753,11 +7743,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
 static void account_event(struct perf_event *event)
 {
+       bool inc = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
        if (event->attr.comm)
@@ -7770,11 +7762,14 @@ static void account_event(struct perf_event *event)
        }
        if (event->attr.context_switch) {
                atomic_inc(&nr_switch_events);
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        }
        if (has_branch_stack(event))
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (is_cgroup_event(event))
+               inc = true;
+
+       if (inc)
                static_key_slow_inc(&perf_sched_events.key);
 
        account_event_cpu(event, event->cpu);
@@ -8422,11 +8417,11 @@ SYSCALL_DEFINE5(perf_event_open,
                 * See perf_event_ctx_lock() for comments on the details
                 * of swizzling perf_event::ctx.
                 */
-               perf_remove_from_context(group_leader, false);
+               perf_remove_from_context(group_leader, 0);
 
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                       perf_remove_from_context(sibling, false);
+                       perf_remove_from_context(sibling, 0);
                        put_ctx(gctx);
                }
 
@@ -8479,6 +8474,8 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_event__header_size(event);
        perf_event__id_header_size(event);
 
+       event->owner = current;
+
        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
 
@@ -8488,8 +8485,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
        put_online_cpus();
 
-       event->owner = current;
-
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@ -8556,7 +8551,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        }
 
        /* Mark owner so we could distinguish it from user events. */
-       event->owner = EVENT_OWNER_KERNEL;
+       event->owner = TASK_TOMBSTONE;
 
        account_event(event);
 
@@ -8606,7 +8601,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
        list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                 event_entry) {
-               perf_remove_from_context(event, false);
+               perf_remove_from_context(event, 0);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
                list_add(&event->migrate_entry, &events);
@@ -8673,33 +8668,15 @@ static void sync_child_event(struct perf_event *child_event,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
                     &parent_event->child_total_time_running);
-
-       /*
-        * Remove this event from the parent's list
-        */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
-       list_del_init(&child_event->child_list);
-       mutex_unlock(&parent_event->child_mutex);
-
-       /*
-        * Make sure user/parent get notified, that we just
-        * lost one event.
-        */
-       perf_event_wakeup(parent_event);
-
-       /*
-        * Release the parent event, if this was the last
-        * reference to it.
-        */
-       put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-                        struct perf_event_context *child_ctx,
-                        struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+                     struct perf_event_context *child_ctx,
+                     struct task_struct *child)
 {
+       struct perf_event *parent_event = child_event->parent;
+
        /*
         * Do not destroy the 'original' grouping; because of the context
         * switch optimization the original events could've ended up in a
@@ -8712,57 +8689,86 @@ __perf_event_exit_task(struct perf_event *child_event,
         * Do destroy all inherited groups, we don't care about those
         * and being thorough is better.
         */
-       perf_remove_from_context(child_event, !!child_event->parent);
+       raw_spin_lock_irq(&child_ctx->lock);
+       WARN_ON_ONCE(child_ctx->is_active);
+
+       if (parent_event)
+               perf_group_detach(child_event);
+       list_del_event(child_event, child_ctx);
+       child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        /*
-        * It can happen that the parent exits first, and has events
-        * that are still around due to the child reference. These
-        * events need to be zapped.
+        * Parent events are governed by their filedesc, retain them.
         */
-       if (child_event->parent) {
-               sync_child_event(child_event, child);
-               free_event(child_event);
-       } else {
-               child_event->state = PERF_EVENT_STATE_EXIT;
+       if (!parent_event) {
                perf_event_wakeup(child_event);
+               return;
        }
+       /*
+        * Child events can be cleaned up.
+        */
+
+       sync_child_event(child_event, child);
+
+       /*
+        * Remove this event from the parent's list
+        */
+       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+       mutex_lock(&parent_event->child_mutex);
+       list_del_init(&child_event->child_list);
+       mutex_unlock(&parent_event->child_mutex);
+
+       /*
+        * Kick perf_poll() for is_event_hup().
+        */
+       perf_event_wakeup(parent_event);
+       free_event(child_event);
+       put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
-       unsigned long flags;
+       struct perf_event *child_event, *next;
+
+       WARN_ON_ONCE(child != current);
 
-       if (likely(!child->perf_event_ctxp[ctxn]))
+       child_ctx = perf_pin_task_context(child, ctxn);
+       if (!child_ctx)
                return;
 
-       local_irq_save(flags);
        /*
-        * We can't reschedule here because interrupts are disabled,
-        * and either child is current or it is a task that can't be
-        * scheduled, so we are now safe from rescheduling changing
-        * our context.
+        * In order to reduce the amount of tricky in ctx tear-down, we hold
+        * ctx::mutex over the entire thing. This serializes against almost
+        * everything that wants to access the ctx.
+        *
+        * The exception is sys_perf_event_open() /
+        * perf_event_create_kernel_count() which does find_get_context()
+        * without ctx::mutex (it cannot because of the move_group double mutex
+        * lock thing). See the comments in perf_install_in_context().
         */
-       child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+       mutex_lock(&child_ctx->mutex);
 
        /*
-        * Take the context lock here so that if find_get_context is
-        * reading child->perf_event_ctxp, we wait until it has
-        * incremented the context's refcount before we do put_ctx below.
+        * In a single ctx::lock section, de-schedule the events and detach the
+        * context from the task such that we cannot ever get it scheduled back
+        * in.
         */
-       raw_spin_lock(&child_ctx->lock);
-       task_ctx_sched_out(child_ctx);
-       child->perf_event_ctxp[ctxn] = NULL;
+       raw_spin_lock_irq(&child_ctx->lock);
+       task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
        /*
-        * If this context is a clone; unclone it so it can't get
-        * swapped to another process while we're removing all
-        * the events from it.
+        * Now that the context is inactive, destroy the task <-> ctx relation
+        * and mark the context dead.
         */
+       RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+       put_ctx(child_ctx); /* cannot be last */
+       WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+       put_task_struct(current); /* cannot be last */
+
        clone_ctx = unclone_ctx(child_ctx);
-       update_context_time(child_ctx);
-       raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        if (clone_ctx)
                put_ctx(clone_ctx);
@@ -8774,20 +8780,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        perf_event_task(child, child_ctx, 0);
 
-       /*
-        * We can recurse on the same lock type through:
-        *
-        *   __perf_event_exit_task()
-        *     sync_child_event()
-        *       put_event()
-        *         mutex_lock(&ctx->mutex)
-        *
-        * But since its the parent context it won't be the same instance.
-        */
-       mutex_lock(&child_ctx->mutex);
-
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-               __perf_event_exit_task(child_event, child_ctx, child);
+               perf_event_exit_event(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
 
@@ -8812,8 +8806,7 @@ void perf_event_exit_task(struct task_struct *child)
                 * the owner, closes a race against perf_release() where
                 * we need to serialize on the owner->perf_event_mutex.
                 */
-               smp_wmb();
-               event->owner = NULL;
+               smp_store_release(&event->owner, NULL);
        }
        mutex_unlock(&child->perf_event_mutex);
 
@@ -8896,21 +8889,20 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-       int err;
-       struct fd f;
-       struct perf_event *event;
+       struct file *file;
 
-       err = perf_fget_light(fd, &f);
-       if (err)
-               return ERR_PTR(err);
+       file = fget_raw(fd);
+       if (!file)
+               return ERR_PTR(-EBADF);
 
-       event = f.file->private_data;
-       atomic_long_inc(&event->refcount);
-       fdput(f);
+       if (file->f_op != &perf_fops) {
+               fput(file);
+               return ERR_PTR(-EBADF);
+       }
 
-       return event;
+       return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +8945,16 @@ inherit_event(struct perf_event *parent_event,
        if (IS_ERR(child_event))
                return child_event;
 
+       /*
+        * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+        * must be under the same lock in order to serialize against
+        * perf_event_release_kernel(), such that either we must observe
+        * is_orphaned_event() or they will observe us on the child_list.
+        */
+       mutex_lock(&parent_event->child_mutex);
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
+               mutex_unlock(&parent_event->child_mutex);
                free_event(child_event);
                return NULL;
        }
@@ -9002,8 +9002,6 @@ inherit_event(struct perf_event *parent_event,
        /*
         * Link this into the parent event's child list
         */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
        list_add_tail(&child_event->child_list, &parent_event->child_list);
        mutex_unlock(&parent_event->child_mutex);
 
@@ -9221,13 +9219,14 @@ static void perf_event_init_cpu(int cpu)
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-       struct remove_event re = { .detach_group = true };
        struct perf_event_context *ctx = __info;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event *event;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-               __perf_remove_from_context(&re);
-       rcu_read_unlock();
+       raw_spin_lock(&ctx->lock);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
+       raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
index 92ce5f4..3f8cb1e 100644 (file)
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
         * current task.
         */
        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-               __perf_event_disable(bp);
+               perf_event_disable_local(bp);
        else
                perf_event_disable(bp);
 
index adfdc05..1faad2c 100644 (file)
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
        __free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+       int pg;
+
+       if (rb->aux_priv) {
+               rb->free_aux(rb->aux_priv);
+               rb->free_aux = NULL;
+               rb->aux_priv = NULL;
+       }
+
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
+
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
                 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@ out:
        if (!ret)
                rb->aux_pgoff = pgoff;
        else
-               rb_free_aux(rb);
+               __rb_free_aux(rb);
 
        return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-       int pg;
-
-       if (rb->aux_priv) {
-               rb->free_aux(rb->aux_priv);
-               rb->free_aux = NULL;
-               rb->aux_priv = NULL;
-       }
-
-       if (rb->aux_nr_pages) {
-               for (pg = 0; pg < rb->aux_nr_pages; pg++)
-                       rb_free_aux_page(rb, pg);
-
-               kfree(rb->aux_pages);
-               rb->aux_nr_pages = 0;
-       }
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
        if (atomic_dec_and_test(&rb->aux_refcount))
index 0773f2b..5d6ce64 100644 (file)
@@ -1191,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        if (pi_state->owner != current)
                return -EINVAL;
 
-       raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
@@ -1217,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        else if (curval != uval)
                ret = -EINVAL;
        if (ret) {
-               raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                return ret;
        }
 
-       raw_spin_lock_irq(&pi_state->owner->pi_lock);
+       raw_spin_lock(&pi_state->owner->pi_lock);
        WARN_ON(list_empty(&pi_state->list));
        list_del_init(&pi_state->list);
-       raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+       raw_spin_unlock(&pi_state->owner->pi_lock);
 
-       raw_spin_lock_irq(&new_owner->pi_lock);
+       raw_spin_lock(&new_owner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &new_owner->pi_state_list);
        pi_state->owner = new_owner;
-       raw_spin_unlock_irq(&new_owner->pi_lock);
+       raw_spin_unlock(&new_owner->pi_lock);
 
-       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
        deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -2127,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                 * we returned due to timeout or signal without taking the
                 * rt_mutex. Too late.
                 */
-               raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
                if (!owner)
                        owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-               raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
                ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
index a302cf9..57bff78 100644 (file)
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
        unsigned int flags = 0, irq = desc->irq_data.irq;
        struct irqaction *action = desc->action;
 
-       do {
+       /* action might have become NULL since we dropped the lock */
+       while (action) {
                irqreturn_t res;
 
                trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
                retval |= res;
                action = action->next;
-       } while (action);
+       }
 
        add_interrupt_randomness(irq, flags);
 
index 6e655f7..3e56d2f 100644 (file)
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
        unsigned int type = IRQ_TYPE_NONE;
        int virq;
 
-       if (fwspec->fwnode)
-               domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-       else
+       if (fwspec->fwnode) {
+               domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                 DOMAIN_BUS_WIRED);
+               if (!domain)
+                       domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                         DOMAIN_BUS_ANY);
+       } else {
                domain = irq_default_domain;
+       }
 
        if (!domain) {
                pr_warn("no irq domain found for %s !\n",
index 8251e75..3e74660 100644 (file)
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
 
        clear_rt_mutex_waiters(lock);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        /*
         * If a new waiter comes in between the unlock and the cmpxchg
         * we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        lock->owner = NULL;
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        return true;
 }
 #endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        int ret = 0, depth = 0;
        struct rt_mutex *lock;
        bool detect_deadlock;
-       unsigned long flags;
        bool requeue = true;
 
        detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /*
         * [1] Task cannot go away as we did a get_task() before !
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock_irq(&task->pi_lock);
 
        /*
         * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
         * operations.
         */
        if (!raw_spin_trylock(&lock->wait_lock)) {
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock_irq(&task->pi_lock);
                cpu_relax();
                goto retry;
        }
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                /*
                 * No requeue[7] here. Just release @task [8]
                 */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock(&task->pi_lock);
                put_task_struct(task);
 
                /*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 * If there is no owner of the lock, end of chain.
                 */
                if (!rt_mutex_owner(lock)) {
-                       raw_spin_unlock(&lock->wait_lock);
+                       raw_spin_unlock_irq(&lock->wait_lock);
                        return 0;
                }
 
                /* [10] Grab the next task, i.e. owner of @lock */
                task = rt_mutex_owner(lock);
                get_task_struct(task);
-               raw_spin_lock_irqsave(&task->pi_lock, flags);
+               raw_spin_lock(&task->pi_lock);
 
                /*
                 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                top_waiter = rt_mutex_top_waiter(lock);
 
                /* [13] Drop locks */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock(&task->pi_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                /* If owner is not blocked, end of chain. */
                if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        rt_mutex_enqueue(lock, waiter);
 
        /* [8] Release the task */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
        put_task_struct(task);
 
        /*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
                        wake_up_process(rt_mutex_top_waiter(lock)->task);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 0;
        }
 
        /* [10] Grab the next task, i.e. the owner of @lock */
        task = rt_mutex_owner(lock);
        get_task_struct(task);
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
 
        /* [11] requeue the pi waiters if necessary */
        if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        top_waiter = rt_mutex_top_waiter(lock);
 
        /* [13] Drop the locks */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock(&task->pi_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        /*
         * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        goto again;
 
  out_unlock_pi:
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
        put_task_struct(task);
 
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
                                struct rt_mutex_waiter *waiter)
 {
-       unsigned long flags;
-
        /*
         * Before testing whether we can acquire @lock, we set the
         * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         * case, but conditionals are more expensive than a redundant
         * store.
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        task->pi_blocked_on = NULL;
        /*
         * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         */
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
 takeit:
        /* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                                   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        struct rt_mutex_waiter *top_waiter = waiter;
        struct rt_mutex *next_lock;
        int chain_walk = 0, res;
-       unsigned long flags;
 
        /*
         * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        if (owner == task)
                return -EDEADLK;
 
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
        task->pi_blocked_on = waiter;
 
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
        if (!owner)
                return 0;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
        if (waiter == rt_mutex_top_waiter(lock)) {
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
        /*
         * Even if full deadlock detection is on, if the owner is not
         * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
         */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
                                         next_lock, waiter, task);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        return res;
 }
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
                                    struct rt_mutex *lock)
 {
        struct rt_mutex_waiter *waiter;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
 
        waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
        bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
        struct task_struct *owner = rt_mutex_owner(lock);
        struct rt_mutex *next_lock;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
        rt_mutex_dequeue(lock, waiter);
        current->pi_blocked_on = NULL;
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        /*
         * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
        if (!owner || !is_top_waiter)
                return;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
 
        rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
 
        /*
         * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
                                   next_lock, NULL, current);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:               the rt_mutex to take
  * @state:              the state the task should block in (TASK_INTERRUPTIBLE
- *                      or TASK_UNINTERRUPTIBLE)
+ *                      or TASK_UNINTERRUPTIBLE)
  * @timeout:            the pre-initialized and started timer, or NULL for none
  * @waiter:             the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
                                break;
                }
 
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                debug_rt_mutex_print_deadlock(waiter);
 
                schedule();
 
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(state);
        }
 
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                  enum rtmutex_chainwalk chwalk)
 {
        struct rt_mutex_waiter waiter;
+       unsigned long flags;
        int ret = 0;
 
        debug_rt_mutex_init_waiter(&waiter);
        RB_CLEAR_NODE(&waiter.pi_tree_entry);
        RB_CLEAR_NODE(&waiter.tree_entry);
 
-       raw_spin_lock(&lock->wait_lock);
+       /*
+        * Technically we could use raw_spin_[un]lock_irq() here, but this can
+        * be called in early boot if the cmpxchg() fast path is disabled
+        * (debug, no architecture support). In this case we will acquire the
+        * rtmutex with lock->wait_lock held. But we cannot unconditionally
+        * enable interrupts in that early boot case. So we need to use the
+        * irqsave/restore variants.
+        */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        /* Try to acquire the lock again: */
        if (try_to_take_rt_mutex(lock, current, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
                return 0;
        }
 
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* Remove pending timer: */
        if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+       unsigned long flags;
        int ret;
 
        /*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
                return 0;
 
        /*
-        * The mutex has currently no owner. Lock the wait lock and
-        * try to acquire the lock.
+        * The mutex has currently no owner. Lock the wait lock and try to
+        * acquire the lock. We use irqsave here to support early boot calls.
         */
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return ret;
 }
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
                                        struct wake_q_head *wake_q)
 {
-       raw_spin_lock(&lock->wait_lock);
+       unsigned long flags;
+
+       /* irqsave required to support early boot calls */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        while (!rt_mutex_has_waiters(lock)) {
                /* Drops lock->wait_lock ! */
-               if (unlock_rt_mutex_safe(lock) == true)
+               if (unlock_rt_mutex_safe(lock, flags) == true)
                        return false;
                /* Relock the rtmutex and try again */
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irqsave(&lock->wait_lock, flags);
        }
 
        /*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        mark_wakeup_next_waiter(wake_q, lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* check PI boosting */
        return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        if (try_to_take_rt_mutex(lock, task, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 1;
        }
 
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
        if (unlikely(ret))
                remove_waiter(lock, waiter);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        return ret;
 }
index e517a16..70ee377 100644 (file)
@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
 }
 EXPORT_SYMBOL(devm_memunmap);
 
-pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
        return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
 }
@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
 
 static void pgmap_radix_release(struct resource *res)
 {
-       resource_size_t key;
+       resource_size_t key, align_start, align_size, align_end;
+
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
 
        mutex_lock(&pgmap_lock);
        for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
                percpu_ref_put(pgmap->ref);
        }
 
-       pgmap_radix_release(res);
-
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
        arch_remove_memory(align_start, align_size);
+       pgmap_radix_release(res);
        dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
                        "%s: failed to free all reserved pages\n", __func__);
 }
@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 {
        int is_ram = region_intersects(res->start, resource_size(res),
                        "System RAM");
-       resource_size_t key, align_start, align_size;
+       resource_size_t key, align_start, align_size, align_end;
        struct dev_pagemap *pgmap;
        struct page_map *page_map;
        unsigned long pfn;
@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 
        mutex_lock(&pgmap_lock);
        error = 0;
-       for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
+       for (key = align_start; key <= align_end; key += SECTION_SIZE) {
                struct dev_pagemap *dup;
 
                rcu_read_lock();
@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (nid < 0)
                nid = numa_mem_id();
 
-       align_start = res->start & ~(SECTION_SIZE - 1);
-       align_size = ALIGN(resource_size(res), SECTION_SIZE);
        error = arch_add_memory(nid, align_start, align_size, true);
        if (error)
                goto err_add_memory;
index 8358f46..9537da3 100644 (file)
@@ -303,6 +303,9 @@ struct load_info {
        struct _ddebug *debug;
        unsigned int num_debug;
        bool sig_ok;
+#ifdef CONFIG_KALLSYMS
+       unsigned long mod_kallsyms_init_off;
+#endif
        struct {
                unsigned int sym, str, mod, vers, info, pcpu;
        } index;
@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
        strsect->sh_flags |= SHF_ALLOC;
        strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
                                         info->index.str) | INIT_OFFSET_MASK;
-       mod->init_layout.size = debug_align(mod->init_layout.size);
        pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
+
+       /* We'll tack temporary mod_kallsyms on the end. */
+       mod->init_layout.size = ALIGN(mod->init_layout.size,
+                                     __alignof__(struct mod_kallsyms));
+       info->mod_kallsyms_init_off = mod->init_layout.size;
+       mod->init_layout.size += sizeof(struct mod_kallsyms);
+       mod->init_layout.size = debug_align(mod->init_layout.size);
 }
 
+/*
+ * We use the full symtab and strtab which layout_symtab arranged to
+ * be appended to the init section.  Later we switch to the cut-down
+ * core-only ones.
+ */
 static void add_kallsyms(struct module *mod, const struct load_info *info)
 {
        unsigned int i, ndst;
@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
        char *s;
        Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
 
-       mod->symtab = (void *)symsec->sh_addr;
-       mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+       /* Set up to point into init section. */
+       mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
+
+       mod->kallsyms->symtab = (void *)symsec->sh_addr;
+       mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
        /* Make sure we get permanent strtab: don't use info->strtab. */
-       mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
+       mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
 
        /* Set types up while we still have access to sections. */
-       for (i = 0; i < mod->num_symtab; i++)
-               mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
-
-       mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
-       mod->core_strtab = s = mod->core_layout.base + info->stroffs;
-       src = mod->symtab;
-       for (ndst = i = 0; i < mod->num_symtab; i++) {
+       for (i = 0; i < mod->kallsyms->num_symtab; i++)
+               mod->kallsyms->symtab[i].st_info
+                       = elf_type(&mod->kallsyms->symtab[i], info);
+
+       /* Now populate the cut down core kallsyms for after init. */
+       mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
+       mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
+       src = mod->kallsyms->symtab;
+       for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
                if (i == 0 ||
                    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
                                   info->index.pcpu)) {
                        dst[ndst] = src[i];
-                       dst[ndst++].st_name = s - mod->core_strtab;
-                       s += strlcpy(s, &mod->strtab[src[i].st_name],
+                       dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+                       s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
                                     KSYM_NAME_LEN) + 1;
                }
        }
-       mod->core_num_syms = ndst;
+       mod->core_kallsyms.num_symtab = ndst;
 }
 #else
 static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
        module_put(mod);
        trim_init_extable(mod);
 #ifdef CONFIG_KALLSYMS
-       mod->num_symtab = mod->core_num_syms;
-       mod->symtab = mod->core_symtab;
-       mod->strtab = mod->core_strtab;
+       /* Switch to core kallsyms now init is done: kallsyms may be walking! */
+       rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
 #endif
        mod_tree_remove_init(mod);
        disable_ro_nx(&mod->init_layout);
@@ -3496,7 +3514,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 
        /* Module is ready to execute: parsing args may do that. */
        after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-                                 -32768, 32767, NULL,
+                                 -32768, 32767, mod,
                                  unknown_module_param_cb);
        if (IS_ERR(after_dashes)) {
                err = PTR_ERR(after_dashes);
@@ -3627,6 +3645,11 @@ static inline int is_arm_mapping_symbol(const char *str)
               && (str[2] == '\0' || str[2] == '.');
 }
 
+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
+{
+       return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
 static const char *get_ksymbol(struct module *mod,
                               unsigned long addr,
                               unsigned long *size,
@@ -3634,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
 {
        unsigned int i, best = 0;
        unsigned long nextval;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
        /* At worse, next value is at end of module */
        if (within_module_init(addr, mod))
@@ -3643,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
 
        /* Scan for closest preceding symbol, and next symbol. (ELF
           starts real symbols at 1). */
-       for (i = 1; i < mod->num_symtab; i++) {
-               if (mod->symtab[i].st_shndx == SHN_UNDEF)
+       for (i = 1; i < kallsyms->num_symtab; i++) {
+               if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
                        continue;
 
                /* We ignore unnamed symbols: they're uninformative
                 * and inserted at a whim. */
-               if (mod->symtab[i].st_value <= addr
-                   && mod->symtab[i].st_value > mod->symtab[best].st_value
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
+               if (*symname(kallsyms, i) == '\0'
+                   || is_arm_mapping_symbol(symname(kallsyms, i)))
+                       continue;
+
+               if (kallsyms->symtab[i].st_value <= addr
+                   && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
                        best = i;
-               if (mod->symtab[i].st_value > addr
-                   && mod->symtab[i].st_value < nextval
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
-                       nextval = mod->symtab[i].st_value;
+               if (kallsyms->symtab[i].st_value > addr
+                   && kallsyms->symtab[i].st_value < nextval)
+                       nextval = kallsyms->symtab[i].st_value;
        }
 
        if (!best)
                return NULL;
 
        if (size)
-               *size = nextval - mod->symtab[best].st_value;
+               *size = nextval - kallsyms->symtab[best].st_value;
        if (offset)
-               *offset = addr - mod->symtab[best].st_value;
-       return mod->strtab + mod->symtab[best].st_name;
+               *offset = addr - kallsyms->symtab[best].st_value;
+       return symname(kallsyms, best);
 }
 
 /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
@@ -3758,19 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
        preempt_disable();
        list_for_each_entry_rcu(mod, &modules, list) {
+               struct mod_kallsyms *kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               if (symnum < mod->num_symtab) {
-                       *value = mod->symtab[symnum].st_value;
-                       *type = mod->symtab[symnum].st_info;
-                       strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
-                               KSYM_NAME_LEN);
+               kallsyms = rcu_dereference_sched(mod->kallsyms);
+               if (symnum < kallsyms->num_symtab) {
+                       *value = kallsyms->symtab[symnum].st_value;
+                       *type = kallsyms->symtab[symnum].st_info;
+                       strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
                        strlcpy(module_name, mod->name, MODULE_NAME_LEN);
                        *exported = is_exported(name, *value, mod);
                        preempt_enable();
                        return 0;
                }
-               symnum -= mod->num_symtab;
+               symnum -= kallsyms->num_symtab;
        }
        preempt_enable();
        return -ERANGE;
@@ -3779,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 static unsigned long mod_find_symname(struct module *mod, const char *name)
 {
        unsigned int i;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
-       for (i = 0; i < mod->num_symtab; i++)
-               if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
-                   mod->symtab[i].st_info != 'U')
-                       return mod->symtab[i].st_value;
+       for (i = 0; i < kallsyms->num_symtab; i++)
+               if (strcmp(name, symname(kallsyms, i)) == 0 &&
+                   kallsyms->symtab[i].st_info != 'U')
+                       return kallsyms->symtab[i].st_value;
        return 0;
 }
 
@@ -3822,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
        module_assert_mutex();
 
        list_for_each_entry(mod, &modules, list) {
+               /* We hold module_mutex: no need for rcu_dereference_sched */
+               struct mod_kallsyms *kallsyms = mod->kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               for (i = 0; i < mod->num_symtab; i++) {
-                       ret = fn(data, mod->strtab + mod->symtab[i].st_name,
-                                mod, mod->symtab[i].st_value);
+               for (i = 0; i < kallsyms->num_symtab; i++) {
+                       ret = fn(data, symname(kallsyms, i),
+                                mod, kallsyms->symtab[i].st_value);
                        if (ret != 0)
                                return ret;
                }
index f4ad91b..4d73a83 100644 (file)
@@ -588,7 +588,7 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       /* Veryify no one has done anything silly */
+       /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
index 02e8dfa..68d3ebc 100644 (file)
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
 
 config APM_EMULATION
        tristate "Advanced Power Management Emulation"
-       depends on PM && SYS_SUPPORTS_APM_EMULATION
+       depends on SYS_SUPPORTS_APM_EMULATION
        help
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
index 0b4570c..074994b 100644 (file)
@@ -1133,7 +1133,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
        if (!desc->count)
                return 0;
 
-       mutex_lock(&file_inode(filp)->i_mutex);
+       inode_lock(file_inode(filp));
        do {
                if (!relay_file_read_avail(buf, *ppos))
                        break;
@@ -1153,7 +1153,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
                        *ppos = relay_file_read_end_pos(buf, read_start, ret);
                }
        } while (desc->count && ret);
-       mutex_unlock(&file_inode(filp)->i_mutex);
+       inode_unlock(file_inode(filp));
 
        return desc->written;
 }
index 44253ad..9503d59 100644 (file)
@@ -222,9 +222,9 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 
        /* Ensure the static_key remains in a consistent state */
        inode = file_inode(filp);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        i = sched_feat_set(cmp);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (i == __SCHED_FEAT_NR)
                return -EINVAL;
 
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
index 1926606..56b7d4b 100644 (file)
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
 {
        if (env->best_task)
                put_task_struct(env->best_task);
-       if (p)
-               get_task_struct(p);
 
        env->best_task = p;
        env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
        long imp = env->p->numa_group ? groupimp : taskimp;
        long moveimp = imp;
        int dist = env->dist;
+       bool assigned = false;
 
        rcu_read_lock();
 
        raw_spin_lock_irq(&dst_rq->lock);
        cur = dst_rq->curr;
        /*
-        * No need to move the exiting task, and this ensures that ->curr
-        * wasn't reaped and thus get_task_struct() in task_numa_assign()
-        * is safe under RCU read lock.
-        * Note that rcu_read_lock() itself can't protect from the final
-        * put_task_struct() after the last schedule().
+        * No need to move the exiting task or idle task.
         */
        if ((cur->flags & PF_EXITING) || is_idle_task(cur))
                cur = NULL;
+       else {
+               /*
+                * The task_struct must be protected here to protect the
+                * p->numa_faults access in the task_weight since the
+                * numa_faults could already be freed in the following path:
+                * finish_task_switch()
+                *     --> put_task_struct()
+                *         --> __put_task_struct()
+                *             --> task_numa_free()
+                */
+               get_task_struct(cur);
+       }
+
        raw_spin_unlock_irq(&dst_rq->lock);
 
        /*
@@ -1386,6 +1394,7 @@ balance:
                 */
                if (!load_too_imbalanced(src_load, dst_load, env)) {
                        imp = moveimp - 1;
+                       put_task_struct(cur);
                        cur = NULL;
                        goto assign;
                }
@@ -1411,9 +1420,16 @@ balance:
                env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+       assigned = true;
        task_numa_assign(env, cur, imp);
 unlock:
        rcu_read_unlock();
+       /*
+        * The dst_rq->curr isn't assigned. The protection for task_struct is
+        * finished.
+        */
+       if (cur && !assigned)
+               put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
index de0e786..544a713 100644 (file)
@@ -162,7 +162,7 @@ static void cpuidle_idle_call(void)
         */
        if (idle_should_freeze()) {
                entered_state = cpuidle_enter_freeze(drv, dev);
-               if (entered_state >= 0) {
+               if (entered_state > 0) {
                        local_irq_enable();
                        goto exit_idle;
                }
index 580ac2d..15a1795 100644 (file)
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
                put_seccomp_filter(thread);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
+
+               /*
+                * Don't let an unprivileged task work around
+                * the no_new_privs restriction by creating
+                * a thread that sets it up, enters seccomp,
+                * then dies.
+                */
+               if (task_no_new_privs(caller))
+                       task_set_no_new_privs(thread);
+
                /*
                 * Opt the other thread into seccomp if needed.
                 * As threads are considered to be trust-realm
                 * equivalent (see ptrace_may_access), it is safe to
                 * allow one thread to transition the other.
                 */
-               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-                       /*
-                        * Don't let an unprivileged task work around
-                        * the no_new_privs restriction by creating
-                        * a thread that sets it up, enters seccomp,
-                        * then dies.
-                        */
-                       if (task_no_new_privs(caller))
-                               task_set_no_new_privs(thread);
-
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
                        seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-               }
        }
 }
 
index f3f1f7a..0508544 100644 (file)
@@ -3508,8 +3508,10 @@ static int sigsuspend(sigset_t *set)
        current->saved_sigmask = current->blocked;
        set_current_blocked(set);
 
-       __set_current_state(TASK_INTERRUPTIBLE);
-       schedule();
+       while (!signal_pending(current)) {
+               __set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
+       }
        set_restore_sigmask();
        return -ERESTARTNOHAND;
 }
index 9142036..97715fd 100644 (file)
@@ -1757,6 +1757,20 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &pipe_proc_fn,
                .extra1         = &pipe_min_size,
        },
+       {
+               .procname       = "pipe-user-pages-hard",
+               .data           = &pipe_user_pages_hard,
+               .maxlen         = sizeof(pipe_user_pages_hard),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "pipe-user-pages-soft",
+               .data           = &pipe_user_pages_soft,
+               .maxlen         = sizeof(pipe_user_pages_soft),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
        { }
 };
 
index 435b885..fa909f9 100644 (file)
@@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer,
  */
 static void __remove_hrtimer(struct hrtimer *timer,
                             struct hrtimer_clock_base *base,
-                            unsigned long newstate, int reprogram)
+                            u8 newstate, int reprogram)
 {
        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-       unsigned int state = timer->state;
+       u8 state = timer->state;
 
        timer->state = newstate;
        if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@ static inline int
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
        if (hrtimer_is_queued(timer)) {
-               unsigned long state = timer->state;
+               u8 state = timer->state;
                int reprogram;
 
                /*
@@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
        return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+                                           const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+       /*
+        * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+        * granular time values. For relative timers we add hrtimer_resolution
+        * (i.e. one jiffie) to prevent short timeouts.
+        */
+       timer->is_rel = mode & HRTIMER_MODE_REL;
+       if (timer->is_rel)
+               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+       return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:     the timer to be added
@@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        /* Remove an active timer from the queue: */
        remove_hrtimer(timer, base, true);
 
-       if (mode & HRTIMER_MODE_REL) {
+       if (mode & HRTIMER_MODE_REL)
                tim = ktime_add_safe(tim, base->get_time());
-               /*
-                * CONFIG_TIME_LOW_RES is a temporary way for architectures
-                * to signal that they simply return xtime in
-                * do_gettimeoffset(). In this case we want to round up by
-                * resolution when starting a relative timer, to avoid short
-                * timeouts. This will go away with the GTOD framework.
-                */
-#ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-       }
+
+       tim = hrtimer_update_lowres(timer, tim, mode);
 
        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:     the timer to read
+ * @adjust:    adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
        unsigned long flags;
        ktime_t rem;
 
        lock_hrtimer_base(timer, &flags);
-       rem = hrtimer_expires_remaining(timer);
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+               rem = hrtimer_expires_remaining_adjusted(timer);
+       else
+               rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);
 
        return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
        timer_stats_account_hrtimer(timer);
        fn = timer->function;
 
+       /*
+        * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+        * timer is restarted with a period then it becomes an absolute
+        * timer. If its not restarted it does not matter.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+               timer->is_rel = false;
+
        /*
         * Because we run timers from hardirq context, there is no chance
         * they get migrated to another cpu, therefore its safe to unlock
index 8d262b4..1d5c720 100644 (file)
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-       ktime_t rem = hrtimer_get_remaining(timer);
+       ktime_t rem = __hrtimer_get_remaining(timer, true);
 
        /*
         * Racy but safe: if the itimer expires after the above
index 36f2ca0..6df8927 100644 (file)
@@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc)
                if (!capable(CAP_SYS_TIME))
                        return -EPERM;
 
-               if (!timeval_inject_offset_valid(&txc->time))
-                       return -EINVAL;
+               if (txc->modes & ADJ_NANO) {
+                       struct timespec ts;
+
+                       ts.tv_sec = txc->time.tv_sec;
+                       ts.tv_nsec = txc->time.tv_usec;
+                       if (!timespec_inject_offset_valid(&ts))
+                               return -EINVAL;
+
+               } else {
+                       if (!timeval_inject_offset_valid(&txc->time))
+                               return -EINVAL;
+               }
        }
 
        /*
index 31d11ac..f2826c3 100644 (file)
@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-       remaining = ktime_sub(hrtimer_get_expires(timer), now);
+       remaining = __hrtimer_expires_remaining_adjusted(timer, now);
        /* Return 0 only, when the timer is expired and not pending */
        if (remaining.tv64 <= 0) {
                /*
index 9d7a053..0b17424 100644 (file)
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
        return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
@@ -993,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void)
        /* Get the next period */
        next = tick_init_jiffy_update();
 
-       hrtimer_forward_now(&ts->sched_timer, tick_period);
        hrtimer_set_expires(&ts->sched_timer, next);
-       tick_program_event(next, 1);
+       hrtimer_forward_now(&ts->sched_timer, tick_period);
+       tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
index f75e35b..ba7d8b2 100644 (file)
@@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
        print_name_offset(m, taddr);
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->function);
-       SEQ_printf(m, ", S:%02lx", timer->state);
+       SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->start_site);
index 45dd798..326a75e 100644 (file)
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct perf_event *event;
+       struct file *file;
 
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (!event)
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        /* make sure event is local and doesn't have pmu::count */
        if (event->oncpu != smp_processor_id() ||
            event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        void *data = (void *) (long) r4;
        struct perf_sample_data sample_data;
        struct perf_event *event;
+       struct file *file;
        struct perf_raw_record raw = {
                .size = size,
                .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (unlikely(!event))
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
                     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
                return -EINVAL;
index 87fb980..d929340 100644 (file)
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
        __buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+       ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
        ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
index dda9e67..202df6c 100644 (file)
@@ -125,6 +125,13 @@ check_stack(unsigned long ip, unsigned long *stack)
                        break;
        }
 
+       /*
+        * Some archs may not have the passed in ip in the dump.
+        * If that happens, we need to show everything.
+        */
+       if (i == stack_trace_max.nr_entries)
+               i = 0;
+
        /*
         * Now find where in the stack these are.
         */
index 61a0264..7ff5dc7 100644 (file)
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);    /* protects wq->maydays list */
 static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
 static bool workqueue_freezing;                /* PL: have wqs started freezing? */
 
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                  int node)
 {
        assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+       /*
+        * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+        * delayed item is pending.  The plan is to keep CPU -> NODE
+        * mapping valid and stable across CPU on/offlines.  Once that
+        * happens, this workaround can be removed.
+        */
+       if (unlikely(node == NUMA_NO_NODE))
+               return wq->dfl_pwq;
+
        return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
 
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
        return worker && worker->current_pwq->wq == wq;
 }
 
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+       static bool printed_dbg_warning;
+       int new_cpu;
+
+       if (likely(!wq_debug_force_rr_cpu)) {
+               if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+                       return cpu;
+       } else if (!printed_dbg_warning) {
+               pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+               printed_dbg_warning = true;
+       }
+
+       if (cpumask_empty(wq_unbound_cpumask))
+               return cpu;
+
+       new_cpu = __this_cpu_read(wq_rr_cpu_last);
+       new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+       if (unlikely(new_cpu >= nr_cpu_ids)) {
+               new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+               if (unlikely(new_cpu >= nr_cpu_ids))
+                       return cpu;
+       }
+       __this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+       return new_cpu;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
 {
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
                return;
 retry:
        if (req_cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
+               cpu = wq_select_unbound_cpu(raw_smp_processor_id());
 
        /* pwq which will be used unless @work is executing elsewhere */
        if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
        timer_stats_timer_set_start_info(&dwork->timer);
 
        dwork->wq = wq;
-       /* timer isn't guaranteed to run in this cpu, record earlier */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
        dwork->cpu = cpu;
        timer->expires = jiffies + delay;
 
-       add_timer_on(timer, cpu);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
 }
 
 /**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
        WARN_ONCE(current->flags & PF_MEMALLOC,
                  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
                  current->pid, current->comm, target_wq->name, target_func);
-       WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+       WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+                             (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
                  worker->current_pwq->wq->name, worker->current_func,
                  target_wq->name, target_func);
index 5a0c1c8..133ebc0 100644 (file)
@@ -210,9 +210,11 @@ config RANDOM32_SELFTEST
 # compression support is select'ed if needed
 #
 config 842_COMPRESS
+       select CRC32
        tristate
 
 config 842_DECOMPRESS
+       select CRC32
        tristate
 
 config ZLIB_INFLATE
@@ -475,6 +477,11 @@ config DDR
          information. This data is useful for drivers handling
          DDR SDRAM controllers.
 
+config IRQ_POLL
+       bool "IRQ polling library"
+       help
+         Helper library to poll interrupt mitigation using polling.
+
 config MPILIB
        tristate
        select CLZ_TAB
index ecb9e75..8bfd1ac 100644 (file)
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 
 endmenu # "RCU Debugging"
 
+config DEBUG_WQ_FORCE_RR_CPU
+       bool "Force round-robin CPU selection for unbound work items"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         Workqueue used to implicitly guarantee that work items queued
+         without explicit CPU specified are put on the local CPU.  This
+         guarantee is no longer true and while local CPU is still
+         preferred work items may be put on foreign CPUs.  Kernel
+         parameter "workqueue.debug_force_rr_cpu" is added to force
+         round-robin CPU selection to flush out usages which depend on the
+         now broken guarantee.  This config option enables the debug
+         feature by default.  When enabled, memory and cache locality will
+         be impacted.
+
 config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
        depends on DEBUG_KERNEL
index 2d4bc33..a7c26a4 100644 (file)
@@ -165,6 +165,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
 
 obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
               fdt_empty_tree.o
index 547f7f9..519b5a1 100644 (file)
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS       14
 #define ODEBUG_HASH_SIZE       (1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE       512
+#define ODEBUG_POOL_SIZE       1024
 #define ODEBUG_POOL_MIN_LEVEL  256
 
 #define ODEBUG_CHUNK_SHIFT     PAGE_SHIFT
index 6745c62..c30d07e 100644 (file)
@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1);
 
 asmlinkage __visible void dump_stack(void)
 {
+       unsigned long flags;
        int was_locked;
        int old;
        int cpu;
@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void)
         * Permit this cpu to perform nested stack dumps while serialising
         * against other CPUs
         */
-       preempt_disable();
-
 retry:
+       local_irq_save(flags);
        cpu = smp_processor_id();
        old = atomic_cmpxchg(&dump_lock, -1, cpu);
        if (old == -1) {
@@ -43,6 +43,7 @@ retry:
        } else if (old == cpu) {
                was_locked = 1;
        } else {
+               local_irq_restore(flags);
                cpu_relax();
                goto retry;
        }
@@ -52,7 +53,7 @@ retry:
        if (!was_locked)
                atomic_set(&dump_lock, -1);
 
-       preempt_enable();
+       local_irq_restore(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
new file mode 100644 (file)
index 0000000..836f7db
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/irq_poll.h>
+#include <linux/delay.h>
+
+static unsigned int irq_poll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * irq_poll_sched - Schedule a run of the iopoll handler
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Add this irq_poll structure to the pending poll list and trigger the
+ *     raise of the blk iopoll softirq.
+ **/
+void irq_poll_sched(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+               return;
+       if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               return;
+
+       local_irq_save(flags);
+       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
+       __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_sched);
+
+/**
+ * __irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     See irq_poll_complete(). This function must be called with interrupts
+ *     disabled.
+ **/
+static void __irq_poll_complete(struct irq_poll *iop)
+{
+       list_del(&iop->list);
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+
+/**
+ * irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     If a driver consumes less than the assigned budget in its run of the
+ *     iopoll handler, it'll end the polled mode by calling this function. The
+ *     iopoll handler will not be invoked again before irq_poll_sched()
+ *     is called.
+ **/
+void irq_poll_complete(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __irq_poll_complete(iop);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_complete);
+
+static void irq_poll_softirq(struct softirq_action *h)
+{
+       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
+       int rearm = 0, budget = irq_poll_budget;
+       unsigned long start_time = jiffies;
+
+       local_irq_disable();
+
+       while (!list_empty(list)) {
+               struct irq_poll *iop;
+               int work, weight;
+
+               /*
+                * If softirq window is exhausted then punt.
+                */
+               if (budget <= 0 || time_after(jiffies, start_time)) {
+                       rearm = 1;
+                       break;
+               }
+
+               local_irq_enable();
+
+               /* Even though interrupts have been re-enabled, this
+                * access is safe because interrupts can only add new
+                * entries to the tail of this list, and only ->poll()
+                * calls can remove this head entry from the list.
+                */
+               iop = list_entry(list->next, struct irq_poll, list);
+
+               weight = iop->weight;
+               work = 0;
+               if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
+                       work = iop->poll(iop, weight);
+
+               budget -= work;
+
+               local_irq_disable();
+
+               /*
+                * Drivers must not modify the iopoll state, if they
+                * consume their assigned weight (or more, some drivers can't
+                * easily just stop processing, they have to complete an
+                * entire mask of commands).In such cases this code
+                * still "owns" the iopoll instance and therefore can
+                * move the instance around on the list at-will.
+                */
+               if (work >= weight) {
+                       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+                               __irq_poll_complete(iop);
+                       else
+                               list_move_tail(&iop->list, list);
+               }
+       }
+
+       if (rearm)
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+
+       local_irq_enable();
+}
+
+/**
+ * irq_poll_disable - Disable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void irq_poll_disable(struct irq_poll *iop)
+{
+       set_bit(IRQ_POLL_F_DISABLE, &iop->state);
+       while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               msleep(1);
+       clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_disable);
+
+/**
+ * irq_poll_enable - Enable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Enable iopoll on this @iop. Note that the handler run will not be
+ *     scheduled, it will only mark it as active.
+ **/
+void irq_poll_enable(struct irq_poll *iop)
+{
+       BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_enable);
+
+/**
+ * irq_poll_init - Initialize this @iop
+ * @iop:      The parent iopoll structure
+ * @weight:   The default weight (or command completion budget)
+ * @poll_fn:  The handler to invoke
+ *
+ * Description:
+ *     Initialize and enable this irq_poll structure.
+ **/
+void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
+{
+       memset(iop, 0, sizeof(*iop));
+       INIT_LIST_HEAD(&iop->list);
+       iop->weight = weight;
+       iop->poll = poll_fn;
+}
+EXPORT_SYMBOL(irq_poll_init);
+
+static int irq_poll_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+
+               local_irq_disable();
+               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+                                this_cpu_ptr(&blk_cpu_iopoll));
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block irq_poll_cpu_notifier = {
+       .notifier_call  = irq_poll_cpu_notify,
+};
+
+static __init int irq_poll_setup(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+       open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
+       register_hotcpu_notifier(&irq_poll_cpu_notifier);
+       return 0;
+}
+subsys_initcall(irq_poll_setup);
index 31ce853..74a54b7 100644 (file)
@@ -75,3 +75,4 @@ module_exit(libcrc32c_mod_fini);
 MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
index fcf5d98..6b79e90 100644 (file)
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_slot(slot, root, &iter, first_index) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
index 40e03ea..2c5de86 100644 (file)
@@ -49,7 +49,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
                if (rs->missed)
                        printk(KERN_WARNING "%s: %d callbacks suppressed\n",
                                func, rs->missed);
-               rs->begin   = 0;
+               rs->begin   = jiffies;
                rs->printed = 0;
                rs->missed  = 0;
        }
index bafa993..004fc70 100644 (file)
@@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next);
  *
  * Description:
  *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
+ *   using sg_miter_start().  A stopped iteration can be resumed by
+ *   calling sg_miter_next() on it.  This is useful when resources (kmap)
+ *   need to be released during iteration.
  *
  * Context:
  *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
index 98866a7..25b5cbf 100644 (file)
@@ -327,36 +327,67 @@ out:
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
        do {                                                                   \
-               BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-               __test_string_get_size((size), (blk_size), (units),            \
-                                      (exp_result));                          \
+               BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+               BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+               __test_string_get_size((size), (blk_size), (exp_result10),     \
+                                      (exp_result2));                         \
        } while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-                                         const enum string_size_units units,
-                                         const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+                                             const char *exp,
+                                             char *res,
+                                             const u64 size,
+                                             const u64 blk_size)
 {
-       char buf[string_get_size_maxbuf];
-
-       string_get_size(size, blk_size, units, buf, sizeof(buf));
-       if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+       if (!memcmp(res, exp, strlen(exp) + 1))
                return;
 
-       buf[sizeof(buf) - 1] = '\0';
-       pr_warn("Test 'test_string_get_size_one' failed!\n");
-       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+       res[string_get_size_maxbuf - 1] = '\0';
+
+       pr_warn("Test 'test_string_get_size' failed!\n");
+       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
                size, blk_size, units);
-       pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+       pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+                                         const char *exp_result10,
+                                         const char *exp_result2)
+{
+       char buf10[string_get_size_maxbuf];
+       char buf2[string_get_size_maxbuf];
+
+       string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+       string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+       test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+                                  size, blk_size);
+
+       test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+                                  size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-       test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-       test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-       test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+       /* small values */
+       test_string_get_size_one(0, 512, "0 B", "0 B");
+       test_string_get_size_one(1, 512, "512 B", "512 B");
+       test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+       /* normal values */
+       test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+       test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+       test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+       /* weird block sizes */
+       test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+       /* huge values */
+       test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+       test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)
index 97a4e06..03cbfa0 100644 (file)
@@ -624,7 +624,7 @@ config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        bool
 
 config DEFERRED_STRUCT_PAGE_INIT
-       bool "Defer initialisation of struct pages to kswapd"
+       bool "Defer initialisation of struct pages to kthreads"
        default n
        depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        depends on MEMORY_HOTPLUG
@@ -633,9 +633,10 @@ config DEFERRED_STRUCT_PAGE_INIT
          single thread. On very large machines this can take a considerable
          amount of time. If this option is set, large machines will bring up
          a subset of memmap at boot and then initialise the rest in parallel
-         when kswapd starts. This has a potential performance impact on
-         processes running early in the lifetime of the systemm until kswapd
-         finishes the initialisation.
+         by starting one-off "pgdatinitX" kernel thread for each node X. This
+         has a potential performance impact on processes running early in the
+         lifetime of the system until these kthreads finish the
+         initialisation.
 
 config IDLE_PAGE_TRACKING
        bool "Enable idle page tracking"
index cc5d29d..926c76d 100644 (file)
@@ -989,7 +989,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
                 * here rather than calling cond_resched().
                 */
                if (current->flags & PF_WQ_WORKER)
-                       schedule_timeout(1);
+                       schedule_timeout_uninterruptible(1);
                else
                        cond_resched();
 
index 8fc5081..ba5d8f3 100644 (file)
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
        if (cmpxchg(&cleancache_ops, NULL, ops))
                return -EBUSY;
index 847ee43..bc94386 100644 (file)
@@ -11,6 +11,7 @@
  */
 #include <linux/export.h>
 #include <linux/compiler.h>
+#include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
        __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 
        if (shadow) {
-               mapping->nrshadows++;
+               mapping->nrexceptional++;
                /*
-                * Make sure the nrshadows update is committed before
+                * Make sure the nrexceptional update is committed before
                 * the nrpages update so that final truncate racing
                 * with reclaim does not see both counters 0 at the
                 * same time and miss a shadow entry.
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
        int err = 0;
 
+       if (dax_mapping(mapping) && mapping->nrexceptional) {
+               err = dax_writeback_mapping_range(mapping, lstart, lend);
+               if (err)
+                       return err;
+       }
+
        if (mapping->nrpages) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
                p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
+
+               if (WARN_ON(dax_mapping(mapping)))
+                       return -EINVAL;
+
                if (shadowp)
                        *shadowp = p;
-               mapping->nrshadows--;
+               mapping->nrexceptional--;
                if (node)
                        workingset_node_shadows_dec(node);
        }
@@ -1245,9 +1256,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Return
-                        * it without attempting to raise page count.
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1494,6 +1505,74 @@ repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:   the address_space to search
+ * @start:     the starting page cache index
+ * @tag:       the tag index
+ * @nr_entries:        the maximum number of entries
+ * @entries:   where the resulting entries are placed
+ * @indices:   the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices)
+{
+       void **slot;
+       unsigned int ret = 0;
+       struct radix_tree_iter iter;
+
+       if (!nr_entries)
+               return 0;
+
+       rcu_read_lock();
+restart:
+       radix_tree_for_each_tagged(slot, &mapping->page_tree,
+                                  &iter, start, tag) {
+               struct page *page;
+repeat:
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+               if (radix_tree_exception(page)) {
+                       if (radix_tree_deref_retry(page)) {
+                               /*
+                                * Transient condition which can only trigger
+                                * when entry at index 0 moves out of or back
+                                * to root: none yet gotten, safe to restart.
+                                */
+                               goto restart;
+                       }
+
+                       /*
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
+                        */
+                       goto export;
+               }
+               if (!page_cache_get_speculative(page))
+                       goto repeat;
+
+               /* Has the page moved? */
+               if (unlikely(page != *slot)) {
+                       page_cache_release(page);
+                       goto repeat;
+               }
+export:
+               indices[ret] = iter.index;
+               entries[ret] = page;
+               if (++ret == nr_entries)
+                       break;
+       }
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
@@ -2684,11 +2763,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret > 0)
                ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
index b64a361..7bf19ff 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -430,10 +430,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
                         * Anon pages in shared mappings are surprising: now
                         * just reject it.
                         */
-                       if (!is_cow_mapping(vm_flags)) {
-                               WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+                       if (!is_cow_mapping(vm_flags))
                                return -EFAULT;
-                       }
                }
        } else if (!(vm_flags & VM_READ)) {
                if (!(gup_flags & FOLL_FORCE))
index 8ad5802..08fc0ba 100644 (file)
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
        .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
 };
 
-static DEFINE_SPINLOCK(split_queue_lock);
-static LIST_HEAD(split_queue);
-static unsigned long split_queue_len;
 static struct shrinker deferred_split_shrinker;
 
 static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                return false;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
-       pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       if (pgtable)
+               pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        atomic_long_inc(&mm->nr_ptes);
        return true;
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        spinlock_t *dst_ptl, *src_ptl;
        struct page *src_page;
        pmd_t pmd;
-       pgtable_t pgtable;
+       pgtable_t pgtable = NULL;
        int ret;
 
-       ret = -ENOMEM;
-       pgtable = pte_alloc_one(dst_mm, addr);
-       if (unlikely(!pgtable))
-               goto out;
+       if (!vma_is_dax(vma)) {
+               ret = -ENOMEM;
+               pgtable = pte_alloc_one(dst_mm, addr);
+               if (unlikely(!pgtable))
+                       goto out;
+       }
 
        dst_ptl = pmd_lock(dst_mm, dst_pmd);
        src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out_unlock;
        }
 
-       if (pmd_trans_huge(pmd)) {
+       if (!vma_is_dax(vma)) {
                /* thp accounting separate from pmd_devmap accounting */
                src_page = pmd_page(pmd);
                VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -1560,7 +1560,8 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        struct mm_struct *mm = tlb->mm;
        int ret = 0;
 
-       if (!pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                goto out_unlocked;
 
        orig_pmd = *pmd;
@@ -1627,7 +1628,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        pmd_t orig_pmd;
        spinlock_t *ptl;
 
-       if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                return 0;
        /*
         * For architectures like ppc64 we look at deposited pgtable
@@ -1690,7 +1692,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
         * We don't have to worry about the ordering of src and dst
         * ptlocks because exclusive mmap_sem prevents deadlock.
         */
-       if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
+       old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
+       if (old_ptl) {
                new_ptl = pmd_lockptr(mm, new_pmd);
                if (new_ptl != old_ptl)
                        spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
@@ -1724,7 +1727,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pmd_t entry;
                bool preserve_write = prot_numa && pmd_write(*pmd);
                ret = 1;
@@ -1760,14 +1764,14 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  * Note that if it returns true, this routine returns without unlocking page
  * table lock. So callers must unlock it.
  */
-bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 {
-       *ptl = pmd_lock(vma->vm_mm, pmd);
+       spinlock_t *ptl;
+       ptl = pmd_lock(vma->vm_mm, pmd);
        if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
-               return true;
-       spin_unlock(*ptl);
-       return false;
+               return ptl;
+       spin_unlock(ptl);
+       return NULL;
 }
 
 #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
@@ -2068,7 +2072,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
        if (likely(writable)) {
                if (likely(referenced)) {
                        result = SCAN_SUCCEED;
-                       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+                       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                                            referenced, writable, result);
                        return 1;
                }
@@ -2078,7 +2082,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 
 out:
        release_pte_pages(pte, _pte);
-       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                            referenced, writable, result);
        return 0;
 }
@@ -2576,7 +2580,7 @@ out_unmap:
                collapse_huge_page(mm, address, hpage, vma, node);
        }
 out:
-       trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced,
+       trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
                                     none_or_zero, result);
        return ret;
 }
@@ -3354,6 +3358,7 @@ int total_mapcount(struct page *page)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
        struct anon_vma *anon_vma;
        int count, mapcount, ret;
        bool mlocked;
@@ -3397,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                lru_add_drain();
 
        /* Prevent deferred_split_scan() touching ->_count */
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && count == 1) {
                if (!list_empty(page_deferred_list(head))) {
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                __split_huge_page(page, list);
                ret = 0;
        } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                pr_alert("total_mapcount: %u, page_count(): %u\n",
                                mapcount, count);
                if (PageTail(page))
@@ -3417,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                dump_page(page, "total_mapcount(head) > 0");
                BUG();
        } else {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                unfreeze_page(anon_vma, head);
                ret = -EBUSY;
        }
@@ -3432,64 +3437,65 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               split_queue_len--;
+               pgdata->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
-               list_add_tail(page_deferred_list(page), &split_queue);
-               split_queue_len++;
+               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
+               pgdata->split_queue_len++;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
-       /*
-        * Split a page from split_queue will free up at least one page,
-        * at most HPAGE_PMD_NR - 1. We don't track exact number.
-        * Let's use HPAGE_PMD_NR / 2 as ballpark.
-        */
-       return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       return ACCESS_ONCE(pgdata->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_init(&split_queue, &list);
-
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &list) {
+       list_for_each_safe(pos, next, &pgdata->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
-               /* race with put_compound_page() */
-               if (!get_page_unless_zero(page)) {
+               if (get_page_unless_zero(page)) {
+                       list_move(page_deferred_list(page), &list);
+               } else {
+                       /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                }
+               if (!--sc->nr_to_scan)
+                       break;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -3501,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                put_page(page);
        }
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_tail(&list, &split_queue);
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       list_splice_tail(&list, &pgdata->split_queue);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
-       return split * HPAGE_PMD_NR / 2;
+       /*
+        * Stop shrinker if we didn't split any page, but the queue is empty.
+        * This can happen if pages were freed under us.
+        */
+       if (!split && list_empty(&pgdata->split_queue))
+               return SHRINK_STOP;
+       return split;
 }
 
 static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
+       .flags = SHRINKER_NUMA_AWARE,
 };
 
 #ifdef CONFIG_DEBUG_FS
index 12908dc..06ae13e 100644 (file)
@@ -1001,7 +1001,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
                nr_nodes--)
 
-#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
                                        unsigned int order)
 {
@@ -1214,8 +1214,8 @@ void free_huge_page(struct page *page)
 
        set_page_private(page, 0);
        page->mapping = NULL;
-       BUG_ON(page_count(page));
-       BUG_ON(page_mapcount(page));
+       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_PAGE(page_mapcount(page), page);
        restore_reserve = PagePrivate(page);
        ClearPagePrivate(page);
 
@@ -1286,6 +1286,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
                set_page_count(p, 0);
                set_compound_head(p, page);
        }
+       atomic_set(compound_mapcount_ptr(page), -1);
 }
 
 /*
index ed8b5ff..a38a21e 100644 (file)
@@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags)
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+/*
+ * These three helpers classifies VMAs for virtual memory accounting.
+ */
+
+/*
+ * Executable code area - executable, not writable, not stack
+ */
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
+}
+
+/*
+ * Stack area - atomatically grows in one direction
+ *
+ * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
+ * do_mmap() forbids all other combinations.
+ */
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+       return (flags & VM_STACK) == VM_STACK;
+}
+
+/*
+ * Data area - private, writable, not stack
+ */
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev, struct rb_node *rb_parent);
index d2ed81e..dd79899 100644 (file)
@@ -1448,7 +1448,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
  * Remaining API functions
  */
 
-phys_addr_t __init memblock_phys_mem_size(void)
+phys_addr_t __init_memblock memblock_phys_mem_size(void)
 {
        return memblock.memory.total_size;
 }
index ca052f2..d06cae2 100644 (file)
@@ -4638,7 +4638,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
@@ -4826,7 +4827,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
index 30991f8..635451a 100644 (file)
@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
         * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
         * without pte special, it would there be refcounted as a normal page.
         */
-       if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
+       if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
                struct page *page;
 
-               page = pfn_t_to_page(pfn);
+               /*
+                * At this point we are committed to insert_page()
+                * regardless of whether the caller specified flags that
+                * result in pfn_t_has_page() == false.
+                */
+               page = pfn_to_page(pfn_t_to_pfn(pfn));
                return insert_page(vma, addr, page, vma->vm_page_prot);
        }
        return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -2232,11 +2237,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 
        page_cache_get(old_page);
 
-       /*
-        * Only catch write-faults on shared writable pages,
-        * read-only shared pages can get COWed by
-        * get_user_pages(.write=1, .force=1).
-        */
        if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                int tmp;
 
index 27d1354..4c4187c 100644 (file)
@@ -548,8 +548,7 @@ retry:
                        goto retry;
                }
 
-               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-                       migrate_page_add(page, qp->pagelist, flags);
+               migrate_page_add(page, qp->pagelist, flags);
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
@@ -625,7 +624,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
        unsigned long endvma = vma->vm_end;
        unsigned long flags = qp->flags;
 
-       if (vma->vm_flags & VM_PFNMAP)
+       if (!vma_migratable(vma))
                return 1;
 
        if (endvma > end)
@@ -644,16 +643,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 
        if (flags & MPOL_MF_LAZY) {
                /* Similar to task_numa_work, skip inaccessible VMAs */
-               if (vma_migratable(vma) &&
-                       vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
+               if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
                        change_prot_numa(vma, start, endvma);
                return 1;
        }
 
-       if ((flags & MPOL_MF_STRICT) ||
-           ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-            vma_migratable(vma)))
-               /* queue pages from current vma */
+       /* queue pages from current vma */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                return 0;
        return 1;
 }
index 2a565ed..563f320 100644 (file)
@@ -117,7 +117,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        unsigned char *vec = walk->private;
        int nr = (end - addr) >> PAGE_SHIFT;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                memset(vec, 1, nr);
                spin_unlock(ptl);
                goto out;
index e1e2b12..96f0010 100644 (file)
@@ -175,7 +175,7 @@ static void __munlock_isolation_failed(struct page *page)
  */
 unsigned int munlock_vma_page(struct page *page)
 {
-       unsigned int nr_pages;
+       int nr_pages;
        struct zone *zone = page_zone(page);
 
        /* For try_to_munlock() and to serialize with page migration */
index 84b1262..2f2415a 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 #endif
 
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -387,8 +390,9 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
 {
+       struct rb_root *root = &mm->mm_rb;
        int i = 0, j, bug = 0;
        struct rb_node *nd, *pn = NULL;
        unsigned long prev = 0, pend = 0;
@@ -411,12 +415,14 @@ static int browse_rb(struct rb_root *root)
                                  vma->vm_start, vma->vm_end);
                        bug = 1;
                }
+               spin_lock(&mm->page_table_lock);
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
                        pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
                }
+               spin_unlock(&mm->page_table_lock);
                i++;
                pn = nd;
                prev = vma->vm_start;
@@ -453,12 +459,16 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;
 
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
@@ -472,7 +482,7 @@ static void validate_mm(struct mm_struct *mm)
                          mm->highest_vm_end, highest_address);
                bug = 1;
        }
-       i = browse_rb(&mm->mm_rb);
+       i = browse_rb(mm);
        if (i != mm->map_count) {
                if (i != -1)
                        pr_emerg("map_count %d rb %d\n", mm->map_count, i);
@@ -2139,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
-       int error;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2182,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2205,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2221,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma,
        struct mm_struct *mm = vma->vm_mm;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2257,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2278,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma,
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2982,9 +2983,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                return false;
 
-       if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-                               (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-               return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+       if (is_data_mapping(flags) &&
+           mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+               if (ignore_rlimit_data)
+                       pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+                                    "%lu. Will be forbidden soon.\n",
+                                    current->comm, current->pid,
+                                    (mm->data_vm + npages) << PAGE_SHIFT,
+                                    rlimit(RLIMIT_DATA));
+               else
+                       return false;
+       }
 
        return true;
 }
@@ -2993,11 +3002,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 {
        mm->total_vm += npages;
 
-       if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+       if (is_exec_mapping(flags))
                mm->exec_vm += npages;
-       else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+       else if (is_stack_mapping(flags))
                mm->stack_vm += npages;
-       else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+       else if (is_data_mapping(flags))
                mm->data_vm += npages;
 }
 
index 63358d9..838ca8b 100644 (file)
@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
        spin_lock_init(&pgdat->numabalancing_migrate_lock);
        pgdat->numabalancing_migrate_nr_pages = 0;
        pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&pgdat->split_queue_lock);
+       INIT_LIST_HEAD(&pgdat->split_queue);
+       pgdat->split_queue_len = 0;
 #endif
        init_waitqueue_head(&pgdat->kswapd_wait);
        init_waitqueue_head(&pgdat->pfmemalloc_wait);
@@ -6615,7 +6620,7 @@ bool is_pageblock_removable_nolock(struct page *page)
        return !has_unmovable_pages(zone, page, 0, true);
 }
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 
 static unsigned long pfn_max_align_down(unsigned long pfn)
 {
index 8a943b9..998607a 100644 (file)
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
 /**
  * pcpu_mem_free - free memory
  * @ptr: memory to free
- * @size: size of the area
  *
  * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
  */
-static void pcpu_mem_free(void *ptr, size_t size)
+static void pcpu_mem_free(void *ptr)
 {
-       if (size <= PAGE_SIZE)
-               kfree(ptr);
-       else
-               vfree(ptr);
+       kvfree(ptr);
 }
 
 /**
@@ -463,8 +459,8 @@ out_unlock:
         * pcpu_mem_free() might end up calling vfree() which uses
         * IRQ-unsafe lock and thus can't be called under pcpu_lock.
         */
-       pcpu_mem_free(old, old_size);
-       pcpu_mem_free(new, new_size);
+       pcpu_mem_free(old);
+       pcpu_mem_free(new);
 
        return 0;
 }
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
-               pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+               pcpu_mem_free(chunk);
                return NULL;
        }
 
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
 {
        if (!chunk)
                return;
-       pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
-       pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+       pcpu_mem_free(chunk->map);
+       pcpu_mem_free(chunk);
 }
 
 /**
index fa2ceb2..440e2a7 100644 (file)
@@ -701,8 +701,7 @@ static void shmem_evict_inode(struct inode *inode)
                        list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
-       } else
-               kfree(info->symlink);
+       }
 
        simple_xattrs_free(&info->xattrs);
        WARN_ON(inode->i_blocks);
@@ -1902,7 +1901,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
 
        if (offset < 0)
@@ -1926,7 +1925,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -2091,7 +2090,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        if (seals & ~(unsigned int)F_ALL_SEALS)
                return -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (info->seals & F_SEAL_SEAL) {
                error = -EPERM;
@@ -2114,7 +2113,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        error = 0;
 
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(shmem_add_seals);
@@ -2164,7 +2163,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                struct address_space *mapping = file->f_mapping;
@@ -2277,7 +2276,7 @@ undone:
        inode->i_private = NULL;
        spin_unlock(&inode->i_lock);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
@@ -2549,13 +2548,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        info = SHMEM_I(inode);
        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
-               info->symlink = kmemdup(symname, len, GFP_KERNEL);
-               if (!info->symlink) {
+               inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+               if (!inode->i_link) {
                        iput(inode);
                        return -ENOMEM;
                }
                inode->i_op = &shmem_short_symlink_operations;
-               inode->i_link = info->symlink;
        } else {
                inode_nohighmem(inode);
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -3132,6 +3130,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 static void shmem_destroy_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
+       kfree(inode->i_link);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
index c43f654..d2c3736 100644 (file)
@@ -1956,9 +1956,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                set_blocksize(bdev, old_block_size);
                blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_flags &= ~S_SWAPFILE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        filp_close(swap_file, NULL);
 
@@ -2183,7 +2183,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
                p->bdev = inode->i_sb->s_bdev;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (IS_SWAPFILE(inode))
                        return -EBUSY;
        } else
@@ -2416,7 +2416,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        mapping = swap_file->f_mapping;
        inode = mapping->host;
 
-       /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+       /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
        error = claim_swapfile(p, inode);
        if (unlikely(error))
                goto bad_swap;
@@ -2561,7 +2561,7 @@ bad_swap:
        vfree(cluster_info);
        if (swap_file) {
                if (inode && S_ISREG(inode->i_mode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        inode = NULL;
                }
                filp_close(swap_file, NULL);
@@ -2574,7 +2574,7 @@ out:
        if (name)
                putname(name);
        if (inode && S_ISREG(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return error;
 }
 
index 76e35ad..e3ee0e2 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/backing-dev.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
                return;
 
        spin_lock_irq(&mapping->tree_lock);
-       /*
-        * Regular page slots are stabilized by the page lock even
-        * without the tree itself locked.  These unlocked entries
-        * need verification under the tree lock.
-        */
-       if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
-               goto unlock;
-       if (*slot != entry)
-               goto unlock;
-       radix_tree_replace_slot(slot, NULL);
-       mapping->nrshadows--;
-       if (!node)
-               goto unlock;
-       workingset_node_shadows_dec(node);
-       /*
-        * Don't track node without shadow entries.
-        *
-        * Avoid acquiring the list_lru lock if already untracked.
-        * The list_empty() test is safe as node->private_list is
-        * protected by mapping->tree_lock.
-        */
-       if (!workingset_node_shadows(node) &&
-           !list_empty(&node->private_list))
-               list_lru_del(&workingset_shadow_nodes, &node->private_list);
-       __radix_tree_delete_node(&mapping->page_tree, node);
+
+       if (dax_mapping(mapping)) {
+               if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+                       mapping->nrexceptional--;
+       } else {
+               /*
+                * Regular page slots are stabilized by the page lock even
+                * without the tree itself locked.  These unlocked entries
+                * need verification under the tree lock.
+                */
+               if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+                                       &slot))
+                       goto unlock;
+               if (*slot != entry)
+                       goto unlock;
+               radix_tree_replace_slot(slot, NULL);
+               mapping->nrexceptional--;
+               if (!node)
+                       goto unlock;
+               workingset_node_shadows_dec(node);
+               /*
+                * Don't track node without shadow entries.
+                *
+                * Avoid acquiring the list_lru lock if already untracked.
+                * The list_empty() test is safe as node->private_list is
+                * protected by mapping->tree_lock.
+                */
+               if (!workingset_node_shadows(node) &&
+                   !list_empty(&node->private_list))
+                       list_lru_del(&workingset_shadow_nodes,
+                                       &node->private_list);
+               __radix_tree_delete_node(&mapping->page_tree, node);
+       }
 unlock:
        spin_unlock_irq(&mapping->tree_lock);
 }
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
        int             i;
 
        cleancache_invalidate_inode(mapping);
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        /* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
  */
 void truncate_inode_pages_final(struct address_space *mapping)
 {
-       unsigned long nrshadows;
+       unsigned long nrexceptional;
        unsigned long nrpages;
 
        /*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
 
        /*
         * When reclaim installs eviction entries, it increases
-        * nrshadows first, then decreases nrpages.  Make sure we see
+        * nrexceptional first, then decreases nrpages.  Make sure we see
         * this in the right order or we might miss an entry.
         */
        nrpages = mapping->nrpages;
        smp_rmb();
-       nrshadows = mapping->nrshadows;
+       nrexceptional = mapping->nrexceptional;
 
-       if (nrpages || nrshadows) {
+       if (nrpages || nrexceptional) {
                /*
                 * As truncation uses a lockless tree lookup, cycle
                 * the tree lock to make sure any ongoing tree
index c108a65..4fb14ca 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /* Check if the vma is being used as a stack by this task */
-static int vm_is_stack_for_task(struct task_struct *t,
-                               struct vm_area_struct *vma)
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
 {
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-/*
- * Check if the vma is being used as a stack.
- * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the task_struct of the task
- * that the vma is stack for. Must be called under rcu_read_lock().
- */
-struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group)
-{
-       if (vm_is_stack_for_task(task, vma))
-               return task;
-
-       if (in_group) {
-               struct task_struct *t;
-
-               for_each_thread(task, t) {
-                       if (vm_is_stack_for_task(t, vma))
-                               return t;
-               }
-       }
-
-       return NULL;
-}
-
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
index 9a6c070..149fdf6 100644 (file)
@@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
        if (tree) {
                spin_lock(&vmpr->sr_lock);
-               vmpr->tree_scanned += scanned;
+               scanned = vmpr->tree_scanned += scanned;
                vmpr->tree_reclaimed += reclaimed;
-               scanned = vmpr->scanned;
                spin_unlock(&vmpr->sr_lock);
 
                if (scanned < vmpressure_win)
index bd620b6..71b1c29 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/oom.h>
 #include <linux/prefetch.h>
 #include <linux/printk.h>
+#include <linux/dax.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * inode reclaim needs to empty out the radix tree or
                 * the nodes are lost.  Don't plant shadows behind its
                 * back.
+                *
+                * We also don't store shadows for DAX mappings because the
+                * only page cache pages found in these are zero pages
+                * covering holes, and because we don't want to mix DAX
+                * exceptional entries and shadow exceptional entries in the
+                * same page_tree.
                 */
                if (reclaimed && page_is_file_cache(page) &&
-                   !mapping_exiting(mapping))
+                   !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow, memcg);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -1436,7 +1443,7 @@ int isolate_lru_page(struct page *page)
        int ret = -EBUSY;
 
        VM_BUG_ON_PAGE(!page_count(page), page);
-       VM_BUG_ON_PAGE(PageTail(page), page);
+       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
        if (PageLRU(page)) {
                struct zone *zone = page_zone(page);
index 64bd0aa..084c672 100644 (file)
@@ -1396,10 +1396,15 @@ static void vmstat_update(struct work_struct *w)
                 * Counters were updated so we expect more updates
                 * to occur in the future. Keep on running the
                 * update worker thread.
+                * If we were marked on cpu_stat_off clear the flag
+                * so that vmstat_shepherd doesn't schedule us again.
                 */
-               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
-                       this_cpu_ptr(&vmstat_work),
-                       round_jiffies_relative(sysctl_stat_interval));
+               if (!cpumask_test_and_clear_cpu(smp_processor_id(),
+                                               cpu_stat_off)) {
+                       queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+                               this_cpu_ptr(&vmstat_work),
+                               round_jiffies_relative(sysctl_stat_interval));
+               }
        } else {
                /*
                 * We did not update any counters so the app may be in
@@ -1408,17 +1413,7 @@ static void vmstat_update(struct work_struct *w)
                 * Defer the checking for differentials to the
                 * shepherd thread on a different processor.
                 */
-               int r;
-               /*
-                * Shepherd work thread does not race since it never
-                * changes the bit if its zero but the cpu
-                * online / off line code may race if
-                * worker threads are still allowed during
-                * shutdown / startup.
-                */
-               r = cpumask_test_and_set_cpu(smp_processor_id(),
-                       cpu_stat_off);
-               VM_BUG_ON(r);
+               cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
        }
 }
 
@@ -1427,18 +1422,6 @@ static void vmstat_update(struct work_struct *w)
  * until the diffs stay at zero. The function is used by NOHZ and can only be
  * invoked when tick processing is not active.
  */
-void quiet_vmstat(void)
-{
-       if (system_state != SYSTEM_RUNNING)
-               return;
-
-       do {
-               if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
-                       cancel_delayed_work(this_cpu_ptr(&vmstat_work));
-
-       } while (refresh_cpu_vm_stats(false));
-}
-
 /*
  * Check if the diffs for a certain cpu indicate that
  * an update is needed.
@@ -1462,6 +1445,30 @@ static bool need_update(int cpu)
        return false;
 }
 
+void quiet_vmstat(void)
+{
+       if (system_state != SYSTEM_RUNNING)
+               return;
+
+       /*
+        * If we are already in hands of the shepherd then there
+        * is nothing for us to do here.
+        */
+       if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
+               return;
+
+       if (!need_update(smp_processor_id()))
+               return;
+
+       /*
+        * Just refresh counters and do not care about the pending delayed
+        * vmstat_update. It doesn't fire that often to matter and canceling
+        * it would be too expensive from this path.
+        * vmstat_shepherd will take care about that for us.
+        */
+       refresh_cpu_vm_stats(false);
+}
+
 
 /*
  * Shepherd worker thread that checks the
@@ -1479,18 +1486,25 @@ static void vmstat_shepherd(struct work_struct *w)
 
        get_online_cpus();
        /* Check processors whose vmstat worker threads have been disabled */
-       for_each_cpu(cpu, cpu_stat_off)
-               if (need_update(cpu) &&
-                       cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
-
-                       queue_delayed_work_on(cpu, vmstat_wq,
-                               &per_cpu(vmstat_work, cpu), 0);
+       for_each_cpu(cpu, cpu_stat_off) {
+               struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
+               if (need_update(cpu)) {
+                       if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+                               queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+               } else {
+                       /*
+                        * Cancel the work if quiet_vmstat has put this
+                        * cpu on cpu_stat_off because the work item might
+                        * be still scheduled
+                        */
+                       cancel_delayed_work(dw);
+               }
+       }
        put_online_cpus();
 
        schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
-
 }
 
 static void __init start_shepherd_timer(void)
@@ -1498,7 +1512,7 @@ static void __init start_shepherd_timer(void)
        int cpu;
 
        for_each_possible_cpu(cpu)
-               INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu),
+               INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
                        vmstat_update);
 
        if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
index aa01713..61ead9e 100644 (file)
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
                        node->slots[i] = NULL;
                        BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
                        node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
-                       BUG_ON(!mapping->nrshadows);
-                       mapping->nrshadows--;
+                       BUG_ON(!mapping->nrexceptional);
+                       mapping->nrexceptional--;
                }
        }
        BUG_ON(node->count);
index bced8c0..7bc2208 100644 (file)
@@ -108,9 +108,7 @@ struct p9_poll_wait {
  * @unsent_req_list: accounting for requests that haven't been sent
  * @req: current request being processed (if any)
  * @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
  * @wpos: write position for current frame
  * @wsize: amount of data to write for current frame
  * @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
        struct list_head unsent_req_list;
        struct p9_req_t *req;
        char tmp_buf[7];
-       int rsize;
-       int rpos;
-       char *rbuf;
+       struct p9_fcall rc;
        int wpos;
        int wsize;
        char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
        if (m->err < 0)
                return;
 
-       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
 
-       if (!m->rbuf) {
-               m->rbuf = m->tmp_buf;
-               m->rpos = 0;
-               m->rsize = 7; /* start by reading header */
+       if (!m->rc.sdata) {
+               m->rc.sdata = m->tmp_buf;
+               m->rc.offset = 0;
+               m->rc.capacity = 7; /* start by reading header */
        }
 
        clear_bit(Rpending, &m->wsched);
-       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
-                m, m->rpos, m->rsize, m->rsize-m->rpos);
-       err = p9_fd_read(m->client, m->rbuf + m->rpos,
-                                               m->rsize - m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+                m, m->rc.offset, m->rc.capacity,
+                m->rc.capacity - m->rc.offset);
+       err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+                        m->rc.capacity - m->rc.offset);
        p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
-       if (err == -EAGAIN) {
+       if (err == -EAGAIN)
                goto end_clear;
-       }
 
        if (err <= 0)
                goto error;
 
-       m->rpos += err;
+       m->rc.offset += err;
 
-       if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
-               u16 tag;
+       /* header read in */
+       if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
-               n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
-               if (n >= m->client->msize) {
+               err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+               if (err) {
+                       p9_debug(P9_DEBUG_ERROR,
+                                "error parsing header: %d\n", err);
+                       goto error;
+               }
+
+               if (m->rc.size >= m->client->msize) {
                        p9_debug(P9_DEBUG_ERROR,
-                                "requested packet size too big: %d\n", n);
+                                "requested packet size too big: %d\n",
+                                m->rc.size);
                        err = -EIO;
                        goto error;
                }
 
-               tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
                p9_debug(P9_DEBUG_TRANS,
-                        "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+                        "mux %p pkt: size: %d bytes tag: %d\n",
+                        m, m->rc.size, m->rc.tag);
 
-               m->req = p9_tag_lookup(m->client, tag);
+               m->req = p9_tag_lookup(m->client, m->rc.tag);
                if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
                        p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
-                                tag);
+                                m->rc.tag);
                        err = -EIO;
                        goto error;
                }
 
                if (m->req->rc == NULL) {
-                       m->req->rc = kmalloc(sizeof(struct p9_fcall) +
-                                               m->client->msize, GFP_NOFS);
-                       if (!m->req->rc) {
-                               m->req = NULL;
-                               err = -ENOMEM;
-                               goto error;
-                       }
+                       p9_debug(P9_DEBUG_ERROR,
+                                "No recv fcall for tag %d (req %p), disconnecting!\n",
+                                m->rc.tag, m->req);
+                       m->req = NULL;
+                       err = -EIO;
+                       goto error;
                }
-               m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
-               memcpy(m->rbuf, m->tmp_buf, m->rsize);
-               m->rsize = n;
+               m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+               memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+               m->rc.capacity = m->rc.size;
        }
 
-       /* not an else because some packets (like clunk) have no payload */
-       if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+       /* packet is read in
+        * not an else because some packets (like clunk) have no payload
+        */
+       if ((m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new packet\n");
                spin_lock(&m->client->lock);
                if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
                list_del(&m->req->req_list);
                spin_unlock(&m->client->lock);
                p9_client_cb(m->client, m->req, status);
-               m->rbuf = NULL;
-               m->rpos = 0;
-               m->rsize = 0;
+               m->rc.sdata = NULL;
+               m->rc.offset = 0;
+               m->rc.capacity = 0;
                m->req = NULL;
        }
 
index 199bc76..4acb1d5 100644 (file)
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
        mutex_unlock(&virtio_9p_lock);
 
        if (!found) {
-               pr_err("no channels available\n");
+               pr_err("no channels available for device %s\n", devname);
                return ret;
        }
 
index d040365..8a4cc2f 100644 (file)
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
        /* check that it's our buffer */
        if (lowpan_is_ipv6(*skb_network_header(skb))) {
+               /* Pull off the 1-byte of 6lowpan header. */
+               skb_pull(skb, 1);
+
                /* Copy the packet so that the IPv6 header is
                 * properly aligned.
                 */
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
+               local_skb->dev = dev;
 
                skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
 
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
                if (!local_skb)
                        goto drop;
 
+               local_skb->dev = dev;
+
                ret = iphc_decompress(local_skb, dev, chan);
                if (ret < 0) {
                        kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
-               local_skb->dev = dev;
 
                if (give_skb_to_upper(local_skb, dev)
                                != NET_RX_SUCCESS) {
index 41b5f38..c78ee2d 100644 (file)
@@ -688,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
         * command to remove it from the controller.
         */
        list_for_each_entry(b, &hdev->le_white_list, list) {
-               struct hci_cp_le_del_from_white_list cp;
+               /* If the device is neither in pend_le_conns nor
+                * pend_le_reports then remove it from the whitelist.
+                */
+               if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+                                              &b->bdaddr, b->bdaddr_type) &&
+                   !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+                                              &b->bdaddr, b->bdaddr_type)) {
+                       struct hci_cp_le_del_from_white_list cp;
+
+                       cp.bdaddr_type = b->bdaddr_type;
+                       bacpy(&cp.bdaddr, &b->bdaddr);
 
-               if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-                                             &b->bdaddr, b->bdaddr_type) ||
-                   hci_pend_le_action_lookup(&hdev->pend_le_reports,
-                                             &b->bdaddr, b->bdaddr_type)) {
-                       white_list_entries++;
+                       hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+                                   sizeof(cp), &cp);
                        continue;
                }
 
-               cp.bdaddr_type = b->bdaddr_type;
-               bacpy(&cp.bdaddr, &b->bdaddr);
+               if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+                       /* White list can not be used with RPAs */
+                       return 0x00;
+               }
 
-               hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-                           sizeof(cp), &cp);
+               white_list_entries++;
        }
 
        /* Since all no longer valid white list entries have been
index 39a5149..eb4f5f2 100644 (file)
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
                chan->sport = psm;
                err = 0;
        } else {
-               u16 p;
+               u16 p, start, end, incr;
+
+               if (chan->src_type == BDADDR_BREDR) {
+                       start = L2CAP_PSM_DYN_START;
+                       end = L2CAP_PSM_AUTO_END;
+                       incr = 2;
+               } else {
+                       start = L2CAP_PSM_LE_DYN_START;
+                       end = L2CAP_PSM_LE_DYN_END;
+                       incr = 1;
+               }
 
                err = -EINVAL;
-               for (p = 0x1001; p < 0x1100; p += 2)
+               for (p = start; p <= end; p += incr)
                        if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
                                chan->psm   = cpu_to_le16(p);
                                chan->sport = cpu_to_le16(p);
index 1bb5515..e4cae72 100644 (file)
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
                return -EINVAL;
 
        /* Restrict usage of well-known PSMs */
-       if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
 static int l2cap_validate_le_psm(u16 psm)
 {
        /* Valid LE_PSM ranges are defined only until 0x00ff */
-       if (psm > 0x00ff)
+       if (psm > L2CAP_PSM_LE_DYN_END)
                return -EINVAL;
 
        /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
-       if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                        goto done;
        }
 
+       bacpy(&chan->src, &la.l2_bdaddr);
+       chan->src_type = la.l2_bdaddr_type;
+
        if (la.l2_cid)
                err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
        else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                break;
        }
 
-       bacpy(&chan->src, &la.l2_bdaddr);
-       chan->src_type = la.l2_bdaddr_type;
-
        if (chan->psm && bdaddr_type_is_le(chan->src_type))
                chan->mode = L2CAP_MODE_LE_FLOWCTL;
 
index ffed8a1..4b175df 100644 (file)
@@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
                        hcon->dst_type = smp->remote_irk->addr_type;
                        queue_work(hdev->workqueue, &conn->id_addr_update_work);
                }
-
-               /* When receiving an indentity resolving key for
-                * a remote device that does not use a resolvable
-                * private address, just remove the key so that
-                * it is possible to use the controller white
-                * list for scanning.
-                *
-                * Userspace will have been told to not store
-                * this key at this point. So it is safe to
-                * just remove it.
-                */
-               if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
-                       list_del_rcu(&smp->remote_irk->list);
-                       kfree_rcu(smp->remote_irk, rcu);
-                       smp->remote_irk = NULL;
-               }
        }
 
        if (smp->csrk) {
index a1abe49..3addc05 100644 (file)
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
        .notifier_call = br_device_event
 };
 
+/* called with RTNL */
 static int br_switchdev_event(struct notifier_block *unused,
                              unsigned long event, void *ptr)
 {
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        struct switchdev_notifier_fdb_info *fdb_info;
        int err = NOTIFY_DONE;
 
-       rtnl_lock();
        p = br_port_get_rtnl(dev);
        if (!p)
                goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        }
 
 out:
-       rtnl_unlock();
        return err;
 }
 
index 10d8775..9e43a31 100644 (file)
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        void *ticket_buf = NULL;
        void *tp, *tpend;
        void **ptp;
-       struct ceph_timespec new_validity;
        struct ceph_crypto_key new_session_key;
        struct ceph_buffer *new_ticket_blob;
        unsigned long new_expires, new_renew_after;
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        if (ret)
                goto out;
 
-       ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-       ceph_decode_timespec(&validity, &new_validity);
+       ceph_decode_timespec(&validity, dp);
+       dp += sizeof(struct ceph_timespec);
        new_expires = get_seconds() + validity.tv_sec;
        new_renew_after = new_expires - (validity.tv_sec / 4);
        dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
                ceph_buffer_put(th->ticket_blob);
        th->session_key = new_session_key;
        th->ticket_blob = new_ticket_blob;
-       th->validity = new_validity;
        th->secret_id = new_secret_id;
        th->expires = new_expires;
        th->renew_after = new_renew_after;
+       th->have_key = true;
        dout(" got ticket service %d (%s) secret_id %lld len %d\n",
             type, ceph_entity_type_name(type), th->secret_id,
             (int)th->ticket_blob->vec.iov_len);
@@ -384,6 +383,24 @@ bad:
        return -ERANGE;
 }
 
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+       if (!th->have_key)
+               return true;
+
+       return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+       if (th->have_key) {
+               if (get_seconds() >= th->expires)
+                       th->have_key = false;
+       }
+
+       return th->have_key;
+}
+
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
 {
        int want = ac->want_keys;
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
                        continue;
 
                th = get_ticket_handler(ac, service);
-
                if (IS_ERR(th)) {
                        *pneed |= service;
                        continue;
                }
 
-               if (get_seconds() >= th->renew_after)
+               if (need_key(th))
                        *pneed |= service;
-               if (get_seconds() >= th->expires)
+               if (!have_key(th))
                        xi->have_keys &= ~service;
        }
 }
 
-
 static int ceph_x_build_request(struct ceph_auth_client *ac,
                                void *buf, void *end)
 {
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
        ac->private = NULL;
 }
 
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
-                                  int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
 {
        struct ceph_x_ticket_handler *th;
 
        th = get_ticket_handler(ac, peer_type);
        if (!IS_ERR(th))
-               memset(&th->validity, 0, sizeof(th->validity));
+               th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+                                        int peer_type)
+{
+       /*
+        * We are to invalidate a service ticket in the hopes of
+        * getting a new, hopefully more valid, one.  But, we won't get
+        * it unless our AUTH ticket is good, so invalidate AUTH ticket
+        * as well, just in case.
+        */
+       invalidate_ticket(ac, peer_type);
+       invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
 }
 
 static int calcu_signature(struct ceph_x_authorizer *au,
index e8b7c69..40b1a3c 100644 (file)
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
        unsigned int service;
 
        struct ceph_crypto_key session_key;
-       struct ceph_timespec validity;
+       bool have_key;
 
        u64 secret_id;
        struct ceph_buffer *ticket_blob;
index 393bfb2..5fcfb98 100644 (file)
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
  * @local_retries: localized retries
  * @local_fallback_retries: localized fallback retries
  * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
  * @vary_r: pass r to recursive calls
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                               unsigned int local_fallback_retries,
                               int recurse_to_leaf,
                               unsigned int vary_r,
+                              unsigned int stable,
                               int *out2,
                               int parent_r)
 {
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
        int collide, reject;
        int count = out_size;
 
-       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
                recurse_to_leaf ? "_LEAF" : "",
                bucket->id, x, outpos, numrep,
                tries, recurse_tries, local_retries, local_fallback_retries,
-               parent_r);
+               parent_r, stable);
 
-       for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+       for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
                /* keep trying until we get a non-out, non-colliding item */
                ftotal = 0;
                skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                if (crush_choose_firstn(map,
                                                         map->buckets[-1-item],
                                                         weight, weight_max,
-                                                        x, outpos+1, 0,
+                                                        x, stable ? 1 : outpos+1, 0,
                                                         out2, outpos, count,
                                                         recurse_tries, 0,
                                                         local_retries,
                                                         local_fallback_retries,
                                                         0,
                                                         vary_r,
+                                                        stable,
                                                         NULL,
                                                         sub_r) <= outpos)
                                                        /* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
        int choose_local_fallback_retries = map->choose_local_fallback_tries;
 
        int vary_r = map->chooseleaf_vary_r;
+       int stable = map->chooseleaf_stable;
 
        if ((__u32)ruleno >= map->max_rules) {
                dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
                case CRUSH_RULE_TAKE:
                        if ((curstep->arg1 >= 0 &&
                             curstep->arg1 < map->max_devices) ||
-                           (-1-curstep->arg1 < map->max_buckets &&
+                           (-1-curstep->arg1 >= 0 &&
+                            -1-curstep->arg1 < map->max_buckets &&
                             map->buckets[-1-curstep->arg1])) {
                                w[0] = curstep->arg1;
                                wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
                                vary_r = curstep->arg1;
                        break;
 
+               case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+                       if (curstep->arg1 >= 0)
+                               stable = curstep->arg1;
+                       break;
+
                case CRUSH_RULE_CHOOSELEAF_FIRSTN:
                case CRUSH_RULE_CHOOSE_FIRSTN:
                        firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
                        osize = 0;
 
                        for (i = 0; i < wsize; i++) {
+                               int bno;
                                /*
                                 * see CRUSH_N, CRUSH_N_MINUS macros.
                                 * basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
                                                continue;
                                }
                                j = 0;
+                               /* make sure bucket id is valid */
+                               bno = -1 - w[i];
+                               if (bno < 0 || bno >= map->max_buckets) {
+                                       /* w[i] is probably CRUSH_ITEM_NONE */
+                                       dprintk("  bad w[i] %d\n", w[i]);
+                                       continue;
+                               }
                                if (firstn) {
                                        int recurse_tries;
                                        if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_tries = choose_tries;
                                        osize += crush_choose_firstn(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, numrep,
                                                curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
                                                choose_local_fallback_retries,
                                                recurse_to_leaf,
                                                vary_r,
+                                               stable,
                                                c+osize,
                                                0);
                                } else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, out_size, numrep,
                                                curstep->arg2,
index 9981039..9cfedf5 100644 (file)
@@ -23,9 +23,6 @@
 #include <linux/ceph/pagelist.h>
 #include <linux/export.h>
 
-#define list_entry_next(pos, member)                                   \
-       list_entry(pos->member.next, typeof(*pos), member)
-
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
  * hosts in the system.  The messenger provides ordered and reliable
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
        }
        con->in_seq = 0;
        con->in_seq_acked = 0;
+
+       con->out_skip = 0;
 }
 
 /*
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
 
 static void con_out_kvec_reset(struct ceph_connection *con)
 {
+       BUG_ON(con->out_skip);
+
        con->out_kvec_left = 0;
        con->out_kvec_bytes = 0;
        con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
 static void con_out_kvec_add(struct ceph_connection *con,
                                size_t size, void *data)
 {
-       int index;
+       int index = con->out_kvec_left;
 
-       index = con->out_kvec_left;
+       BUG_ON(con->out_skip);
        BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
 
        con->out_kvec[index].iov_len = size;
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
        con->out_kvec_bytes += size;
 }
 
+/*
+ * Chop off a kvec from the end.  Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+       int off = con->out_kvec_cur - con->out_kvec;
+       int skip = 0;
+
+       if (con->out_kvec_bytes > 0) {
+               skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+               BUG_ON(con->out_kvec_bytes < skip);
+               BUG_ON(!con->out_kvec_left);
+               con->out_kvec_bytes -= skip;
+               con->out_kvec_left--;
+       }
+
+       return skip;
+}
+
 #ifdef CONFIG_BLOCK
 
 /*
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
        /* Move on to the next page */
 
        BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
-       cursor->page = list_entry_next(cursor->page, lru);
+       cursor->page = list_next_entry(cursor->page, lru);
        cursor->last_piece = cursor->resid <= PAGE_SIZE;
 
        return true;
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
        if (!cursor->resid && cursor->total_resid) {
                WARN_ON(!cursor->last_piece);
                BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-               cursor->data = list_entry_next(cursor->data, links);
+               cursor->data = list_next_entry(cursor->data, links);
                __ceph_msg_data_cursor_init(cursor);
                new_piece = true;
        }
@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
        m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
 
        dout("prepare_write_message_footer %p\n", con);
-       con->out_kvec_is_msg = true;
        con->out_kvec[v].iov_base = &m->footer;
        if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
                if (con->ops->sign_message)
@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con)
        u32 crc;
 
        con_out_kvec_reset(con);
-       con->out_kvec_is_msg = true;
        con->out_msg_done = false;
 
        /* Sneak an ack in there first?  If we can get it into the same
@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con)
 
        /* tag + hdr + front + middle */
        con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
-       con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+       con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
        con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
 
        if (m->middle)
                con_out_kvec_add(con, m->middle->vec.iov_len,
                        m->middle->vec.iov_base);
 
-       /* fill in crc (except data pages), footer */
+       /* fill in hdr crc and finalize hdr */
        crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
        con->out_msg->hdr.crc = cpu_to_le32(crc);
-       con->out_msg->footer.flags = 0;
+       memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
 
+       /* fill in front and middle crc, footer */
        crc = crc32c(0, m->front.iov_base, m->front.iov_len);
        con->out_msg->footer.front_crc = cpu_to_le32(crc);
        if (m->middle) {
@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con)
        dout("%s front_crc %u middle_crc %u\n", __func__,
             le32_to_cpu(con->out_msg->footer.front_crc),
             le32_to_cpu(con->out_msg->footer.middle_crc));
+       con->out_msg->footer.flags = 0;
 
        /* is there a data payload? */
        con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con)
                }
        }
        con->out_kvec_left = 0;
-       con->out_kvec_is_msg = false;
        ret = 1;
 out:
        dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con)
 {
        int ret;
 
+       dout("%s %p %d left\n", __func__, con, con->out_skip);
        while (con->out_skip > 0) {
                size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
 
@@ -2506,13 +2528,13 @@ more:
 
 more_kvec:
        /* kvec data queued? */
-       if (con->out_skip) {
-               ret = write_partial_skip(con);
+       if (con->out_kvec_left) {
+               ret = write_partial_kvec(con);
                if (ret <= 0)
                        goto out;
        }
-       if (con->out_kvec_left) {
-               ret = write_partial_kvec(con);
+       if (con->out_skip) {
+               ret = write_partial_skip(con);
                if (ret <= 0)
                        goto out;
        }
@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con)
 
 static void con_fault_finish(struct ceph_connection *con)
 {
+       dout("%s %p\n", __func__, con);
+
        /*
         * in case we faulted due to authentication, invalidate our
         * current tickets so that we can get new ones.
         */
-       if (con->auth_retry && con->ops->invalidate_authorizer) {
-               dout("calling invalidate_authorizer()\n");
-               con->ops->invalidate_authorizer(con);
+       if (con->auth_retry) {
+               dout("auth_retry %d, invalidating\n", con->auth_retry);
+               if (con->ops->invalidate_authorizer)
+                       con->ops->invalidate_authorizer(con);
+               con->auth_retry = 0;
        }
 
        if (con->ops->fault)
@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
                ceph_msg_put(msg);
        }
        if (con->out_msg == msg) {
-               dout("%s %p msg %p - was sending\n", __func__, con, msg);
-               con->out_msg = NULL;
-               if (con->out_kvec_is_msg) {
-                       con->out_skip = con->out_kvec_bytes;
-                       con->out_kvec_is_msg = false;
+               BUG_ON(con->out_skip);
+               /* footer */
+               if (con->out_msg_done) {
+                       con->out_skip += con_out_kvec_skip(con);
+               } else {
+                       BUG_ON(!msg->data_length);
+                       if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+                               con->out_skip += sizeof(msg->footer);
+                       else
+                               con->out_skip += sizeof(msg->old_footer);
                }
+               /* data, middle, front */
+               if (msg->data_length)
+                       con->out_skip += msg->cursor.total_resid;
+               if (msg->middle)
+                       con->out_skip += con_out_kvec_skip(con);
+               con->out_skip += con_out_kvec_skip(con);
+
+               dout("%s %p msg %p - was sending, will write %d skip %d\n",
+                    __func__, con, msg, con->out_kvec_bytes, con->out_skip);
                msg->hdr.seq = 0;
-
+               con->out_msg = NULL;
                ceph_msg_put(msg);
        }
+
        mutex_unlock(&con->mutex);
 }
 
@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m)
 static void ceph_msg_release(struct kref *kref)
 {
        struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-       LIST_HEAD(data);
-       struct list_head *links;
-       struct list_head *next;
+       struct ceph_msg_data *data, *next;
 
        dout("%s %p\n", __func__, m);
        WARN_ON(!list_empty(&m->list_head));
@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref)
                m->middle = NULL;
        }
 
-       list_splice_init(&m->data, &data);
-       list_for_each_safe(links, next, &data) {
-               struct ceph_msg_data *data;
-
-               data = list_entry(links, struct ceph_msg_data, links);
-               list_del_init(links);
+       list_for_each_entry_safe(data, next, &m->data, links) {
+               list_del_init(&data->links);
                ceph_msg_data_destroy(data);
        }
        m->data_length = 0;
index edda016..de85ddd 100644 (file)
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
        return monc->client->have_fsid && monc->auth->global_id > 0;
 }
 
-/*
- * The monitor responds with mount ack indicate mount success.  The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
 static void ceph_monc_handle_map(struct ceph_mon_client *monc,
                                 struct ceph_msg *msg)
 {
index f8f2359..3534e12 100644 (file)
@@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
        u32 osdmap_epoch;
        int already_completed;
        u32 bytes;
+       u8 decode_redir;
        unsigned int i;
 
        tid = le64_to_cpu(msg->hdr.tid);
@@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                p += 8 + 4; /* skip replay_version */
                p += 8; /* skip user_version */
 
+               if (le16_to_cpu(msg->hdr.version) >= 7)
+                       ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+               else
+                       decode_redir = 1;
+       } else {
+               decode_redir = 0;
+       }
+
+       if (decode_redir) {
                err = ceph_redirect_decode(&p, end, &redir);
                if (err)
                        goto bad_put;
index 7d8f581..243574c 100644 (file)
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = ceph_decode_32(p);
         c->choose_local_fallback_tries =  ceph_decode_32(p);
         c->choose_total_tries = ceph_decode_32(p);
-        dout("crush decode tunable choose_local_tries = %d",
+        dout("crush decode tunable choose_local_tries = %d\n",
              c->choose_local_tries);
-        dout("crush decode tunable choose_local_fallback_tries = %d",
+        dout("crush decode tunable choose_local_fallback_tries = %d\n",
              c->choose_local_fallback_tries);
-        dout("crush decode tunable choose_total_tries = %d",
+        dout("crush decode tunable choose_total_tries = %d\n",
              c->choose_total_tries);
 
        ceph_decode_need(p, end, sizeof(u32), done);
        c->chooseleaf_descend_once = ceph_decode_32(p);
-       dout("crush decode tunable chooseleaf_descend_once = %d",
+       dout("crush decode tunable chooseleaf_descend_once = %d\n",
             c->chooseleaf_descend_once);
 
        ceph_decode_need(p, end, sizeof(u8), done);
        c->chooseleaf_vary_r = ceph_decode_8(p);
-       dout("crush decode tunable chooseleaf_vary_r = %d",
+       dout("crush decode tunable chooseleaf_vary_r = %d\n",
             c->chooseleaf_vary_r);
 
+       /* skip straw_calc_version, allowed_bucket_algs */
+       ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+       *p += sizeof(u8) + sizeof(u32);
+
+       ceph_decode_need(p, end, sizeof(u8), done);
+       c->chooseleaf_stable = ceph_decode_8(p);
+       dout("crush decode tunable chooseleaf_stable = %d\n",
+            c->chooseleaf_stable);
+
 done:
        dout("crush_decode success\n");
        return c;
index cc9e365..8cba3d8 100644 (file)
@@ -4351,6 +4351,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= p->vlan_tci ^ skb->vlan_tci;
+               diffs |= skb_metadata_dst_cmp(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
                                                      skb_mac_header(skb));
@@ -4548,10 +4549,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                break;
 
        case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+                       skb_dst_drop(skb);
                        kmem_cache_free(skbuff_head_cache, skb);
-               else
+               } else {
                        __kfree_skb(skb);
+               }
                break;
 
        case GRO_HELD:
index 1df98c5..e92b759 100644 (file)
@@ -93,10 +93,17 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 {
        struct sock_reuseport *reuse;
 
+       if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+               int err = reuseport_alloc(sk2);
+
+               if (err)
+                       return err;
+       }
+
        spin_lock_bh(&reuseport_lock);
        reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock)),
index c229205..7758247 100644 (file)
@@ -353,6 +353,7 @@ config INET_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index 744e593..d07fc07 100644 (file)
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
 
        if (!n->tn_bits)
                kmem_cache_free(trie_leaf_kmem, n);
-       else if (n->tn_bits <= TNODE_KMALLOC_MAX)
-               kfree(n);
        else
-               vfree(n);
+               kvfree(n);
 }
 
 #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
@@ -1396,9 +1394,10 @@ found:
                struct fib_info *fi = fa->fa_info;
                int nhsel, err;
 
-               if ((index >= (1ul << fa->fa_slen)) &&
-                   ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
-                       continue;
+               if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+                       if (index >= (1ul << fa->fa_slen))
+                               continue;
+               }
                if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
                        continue;
                if (fi->fib_dead)
index 8bb8e7a..6029157 100644 (file)
@@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
                                 req->id.idiag_dport, req->id.idiag_src[0],
                                 req->id.idiag_sport, req->id.idiag_if);
 #if IS_ENABLED(CONFIG_IPV6)
-       else if (req->sdiag_family == AF_INET6)
-               sk = inet6_lookup(net, hashinfo,
-                                 (struct in6_addr *)req->id.idiag_dst,
-                                 req->id.idiag_dport,
-                                 (struct in6_addr *)req->id.idiag_src,
-                                 req->id.idiag_sport,
-                                 req->id.idiag_if);
+       else if (req->sdiag_family == AF_INET6) {
+               if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+                   ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+                       sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+                                        req->id.idiag_dport, req->id.idiag_src[3],
+                                        req->id.idiag_sport, req->id.idiag_if);
+               else
+                       sk = inet6_lookup(net, hashinfo,
+                                         (struct in6_addr *)req->id.idiag_dst,
+                                         req->id.idiag_dport,
+                                         (struct in6_addr *)req->id.idiag_src,
+                                         req->id.idiag_sport,
+                                         req->id.idiag_if);
+       }
 #endif
        else
                return ERR_PTR(-EINVAL);
index 3f00810..187c6fc 100644 (file)
@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
        struct ipq *qp;
 
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+       skb_orphan(skb);
 
        /* Lookup (or create) queue header */
        qp = ip_find(net, ip_hdr(skb), user, vif);
index b1209b6..d77eb0c 100644 (file)
@@ -316,7 +316,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
 
-       if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+       if (sysctl_ip_early_demux &&
+           !skb_dst(skb) &&
+           !skb->sk &&
+           !ip_is_fragment(iph)) {
                const struct net_protocol *ipprot;
                int protocol = iph->protocol;
 
index 67f7c9d..2ed9dd2 100644 (file)
@@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
 
 /* Persistent data: */
 
+#ifdef IPCONFIG_DYNAMIC
 static int ic_proto_used;                      /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
 static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
 static u8 ic_domain[64];               /* DNS (not NIS) domain name */
 
index 6fb869f..a04dee5 100644 (file)
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
 {
        int err;
 
-       skb_orphan(skb);
-
        local_bh_disable();
        err = ip_defrag(net, skb, user);
        local_bh_enable();
index fd17eec..19746b3 100644 (file)
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -2638,6 +2639,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        unsigned int start;
+       u64 rate64;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2703,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_total_retrans = tp->total_retrans;
 
        rate = READ_ONCE(sk->sk_pacing_rate);
-       info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_pacing_rate);
 
        rate = READ_ONCE(sk->sk_max_pacing_rate);
-       info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_max_pacing_rate);
 
        do {
                start = u64_stats_fetch_begin_irq(&tp->syncp);
-               info->tcpi_bytes_acked = tp->bytes_acked;
-               info->tcpi_bytes_received = tp->bytes_received;
+               put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+               put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
        } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
index 0003d40..1c2a734 100644 (file)
@@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-       int cnt, oldcnt;
-       int err;
+       int cnt, oldcnt, lost;
        unsigned int mss;
        /* Use SACK to deduce losses of new sequences sent during recovery */
        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                                break;
 
                        mss = tcp_skb_mss(skb);
-                       err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
-                                          mss, GFP_ATOMIC);
-                       if (err < 0)
+                       /* If needed, chop off the prefix to mark as lost. */
+                       lost = (packets - oldcnt) * mss;
+                       if (lost < skb->len &&
+                           tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
                                break;
                        cnt = packets;
                }
@@ -2366,8 +2366,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
                        tp->snd_ssthresh = tp->prior_ssthresh;
                        tcp_ecn_withdraw_cwr(tp);
                }
-       } else {
-               tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
        }
        tp->snd_cwnd_stamp = tcp_time_stamp;
        tp->undo_marker = 0;
index 5ced3e4..a4d5237 100644 (file)
@@ -707,7 +707,8 @@ release_sk1:
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+                           struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
                            int reply_flags, u8 tos)
@@ -722,7 +723,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
                        ];
        } rep;
        struct ip_reply_arg arg;
-       struct net *net = dev_net(skb_dst(skb)->dev);
 
        memset(&rep.th, 0, sizeof(struct tcphdr));
        memset(&arg, 0, sizeof(arg));
@@ -784,7 +784,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
        struct inet_timewait_sock *tw = inet_twsk(sk);
        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-       tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+       tcp_v4_send_ack(sock_net(sk), skb,
+                       tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent,
@@ -803,8 +804,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
-       tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
-                       tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+       u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+                                            tcp_sk(sk)->snd_nxt;
+
+       tcp_v4_send_ack(sock_net(sk), skb, seq,
                        tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
                        tcp_time_stamp,
                        req->ts_recent,
index dc45b53..be0b218 100644 (file)
@@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -512,14 +513,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -601,14 +607,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
index bb7dabe..40c8975 100644 (file)
@@ -69,6 +69,7 @@ config INET6_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index 517c55b..4281621 100644 (file)
@@ -162,6 +162,9 @@ ipv4_connected:
        fl6.fl6_dport = inet->inet_dport;
        fl6.fl6_sport = inet->inet_sport;
 
+       if (!fl6.flowi6_oif)
+               fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
        if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
                fl6.flowi6_oif = np->mcast_oif;
 
index 23de98f..a163102 100644 (file)
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
        struct rt6_info *rt;
 #endif
        int err;
+       int flags = 0;
 
        /* The correct way to handle this would be to do
         * ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
                        dst_release(*dst);
                        *dst = NULL;
                }
+
+               if (fl6->flowi6_oif)
+                       flags |= RT6_LOOKUP_F_IFACE;
        }
 
        if (!*dst)
-               *dst = ip6_route_output(net, sk, fl6);
+               *dst = ip6_route_output_flags(net, sk, fl6, flags);
 
        err = (*dst)->error;
        if (err)
index 3c8834b..ed44663 100644 (file)
@@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                   struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags)
 {
        struct dst_entry *dst;
-       int flags = 0;
        bool any_src;
 
        dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 
        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
index e794ef6..2066d1c 100644 (file)
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
        if ((__force u16)t->parms.i_flags & SIT_ISATAP)
                dev->priv_flags |= IFF_ISATAP;
 
+       dev->rtnl_link_ops = &sit_link_ops;
+
        err = register_netdevice(dev);
        if (err < 0)
                goto out;
 
        ipip6_tunnel_clone_6rd(dev, sitn);
 
-       dev->rtnl_link_ops = &sit_link_ops;
-
        dev_hold(dev);
 
        ipip6_tunnel_link(sitn, t);
index 5d2c2af..22e28a4 100644 (file)
@@ -257,6 +257,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -270,14 +271,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -321,6 +326,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -358,14 +364,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
index 3c4caa6..5728e76 100644 (file)
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
                return -1;
        }
        skb_put(skb, count);
+       pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
 
        spin_unlock_irqrestore(&self->spinlock, flags);
 
-       pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
        if (flush) {
                /* ircomm_tty_do_softint will take care of the rest */
                schedule_work(&self->tqueue);
index ef50a94..fc3598a 100644 (file)
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
        if (!addr || addr->sa_family != AF_IUCV)
                return -EINVAL;
 
+       if (addr_len < sizeof(struct sockaddr_iucv))
+               return -EINVAL;
+
        lock_sock(sk);
        if (sk->sk_state != IUCV_OPEN) {
                err = -EBADFD;
index f7fc0e0..978d3bc 100644 (file)
@@ -1733,7 +1733,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
                if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
                        continue;
                sdata->u.ibss.last_scan_completed = jiffies;
-               ieee80211_queue_work(&local->hw, &sdata->work);
        }
        mutex_unlock(&local->iflist_mtx);
 }
index 6bcf0fa..8190bf2 100644 (file)
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
 
        /* wait for scan work complete */
        flush_workqueue(local->workqueue);
+       flush_work(&local->sched_scan_stopped_work);
 
        WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
             "%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
        list_for_each_entry(sdata, &local->interfaces, list)
                flush_delayed_work(&sdata->dec_tailroom_needed_wk);
        ieee80211_scan_cancel(local);
+
+       /* make sure any new ROC will consider local->in_reconfig */
+       flush_delayed_work(&local->roc_work);
+       flush_work(&local->hw_roc_done);
+
        ieee80211_reconfig(local);
        rtnl_unlock();
 }
index fa28500..6f85b6a 100644 (file)
@@ -1370,17 +1370,6 @@ out:
        sdata_unlock(sdata);
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
-       struct ieee80211_sub_if_data *sdata;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(sdata, &local->interfaces, list)
-               if (ieee80211_vif_is_mesh(&sdata->vif) &&
-                   ieee80211_sdata_running(sdata))
-                       ieee80211_queue_work(&local->hw, &sdata->work);
-       rcu_read_unlock();
-}
 
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
index a159634..4a8019f 100644 (file)
@@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
        return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
 void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_stop(void);
 #else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
 static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 { return false; }
 static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
index 1c342e2..bfbb1ac 100644 (file)
@@ -4005,8 +4005,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
                if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
                        ieee80211_queue_work(&sdata->local->hw,
                                             &sdata->u.mgd.monitor_work);
-               /* and do all the other regular work too */
-               ieee80211_queue_work(&sdata->local->hw, &sdata->work);
        }
 }
 
index 8b2f4ea..55a9c5b 100644 (file)
@@ -252,14 +252,11 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
 static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
                                         unsigned long start_time)
 {
-       struct ieee80211_local *local = roc->sdata->local;
-
        if (WARN_ON(roc->notified))
                return;
 
        roc->start_time = start_time;
        roc->started = true;
-       roc->hw_begun = true;
 
        if (roc->mgmt_tx_cookie) {
                if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
        }
 
        roc->notified = true;
-
-       if (!local->ops->remain_on_channel)
-               ieee80211_recalc_sw_work(local, start_time);
 }
 
 static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
                if (!roc->started)
                        break;
 
+               roc->hw_begun = true;
                ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
        }
 
@@ -413,6 +408,10 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
                return;
        }
 
+       /* defer roc if driver is not started (i.e. during reconfig) */
+       if (local->in_reconfig)
+               return;
+
        roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
                               list);
 
@@ -534,8 +533,10 @@ ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
         * begin, otherwise they'll both be marked properly by the work
         * struct that runs once the driver notifies us of the beginning
         */
-       if (cur_roc->hw_begun)
+       if (cur_roc->hw_begun) {
+               new_roc->hw_begun = true;
                ieee80211_handle_roc_started(new_roc, now);
+       }
 
        return true;
 }
@@ -658,6 +659,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
                        queued = true;
                        roc->on_channel = tmp->on_channel;
                        ieee80211_handle_roc_started(roc, now);
+                       ieee80211_recalc_sw_work(local, now);
                        break;
                }
 
index a413e52..ae980ce 100644 (file)
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
        bool was_scanning = local->scanning;
        struct cfg80211_scan_request *scan_req;
        struct ieee80211_sub_if_data *scan_sdata;
+       struct ieee80211_sub_if_data *sdata;
 
        lockdep_assert_held(&local->mtx);
 
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
        ieee80211_mlme_notify_scan_completed(local);
        ieee80211_ibss_notify_scan_completed(local);
-       ieee80211_mesh_notify_scan_completed(local);
+
+       /* Requeue all the work that might have been ignored while
+        * the scan was in progress; if there was none this will
+        * just be a no-op for the particular interface.
+        */
+       list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+               if (ieee80211_sdata_running(sdata))
+                       ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+       }
+
        if (was_scanning)
                ieee80211_start_next_roc(local);
 }
@@ -1213,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
 
        trace_api_sched_scan_stopped(local);
 
+       /*
+        * this shouldn't really happen, so for simplicity
+        * simply ignore it, and let mac80211 reconfigure
+        * the sched scan later on.
+        */
+       if (local->in_reconfig)
+               return;
+
        schedule_work(&local->sched_scan_stopped_work);
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
index 4402ad5..a4a4f89 100644 (file)
@@ -1453,7 +1453,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 
        more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
 
-       if (reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+       if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
                driver_release_tids =
                        BIT(find_highest_prio_tid(driver_release_tids));
 
index 5bad05e..6101deb 100644 (file)
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
        struct ieee80211_hdr *hdr = (void *)skb->data;
        int ac;
 
+       if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+               ieee80211_free_txskb(&local->hw, skb);
+               return;
+       }
+
        /*
         * This skb 'survived' a round-trip through the driver, and
         * hopefully the driver didn't mangle it too badly. However,
index 3943d4b..58f58bd 100644 (file)
@@ -2043,16 +2043,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                 */
                if (sched_scan_req->n_scan_plans > 1 ||
                    __ieee80211_request_sched_scan_start(sched_scan_sdata,
-                                                        sched_scan_req))
+                                                        sched_scan_req)) {
+                       RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+                       RCU_INIT_POINTER(local->sched_scan_req, NULL);
                        sched_scan_stopped = true;
+               }
        mutex_unlock(&local->mtx);
 
        if (sched_scan_stopped)
                cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
 
  wake_up:
-       local->in_reconfig = false;
-       barrier();
+       if (local->in_reconfig) {
+               local->in_reconfig = false;
+               barrier();
+
+               /* Restart deferred ROCs */
+               mutex_lock(&local->mtx);
+               ieee80211_start_next_roc(local);
+               mutex_unlock(&local->mtx);
+       }
 
        if (local->monitors == local->open_count && local->monitors > 0)
                ieee80211_add_virtual_monitor(local);
index 43d8c98..f0f688d 100644 (file)
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
index 3cb3cb8..58882de 100644 (file)
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+       spin_lock(lock);
+       while (unlikely(nf_conntrack_locks_all)) {
+               spin_unlock(lock);
+               spin_lock(&nf_conntrack_locks_all_lock);
+               spin_unlock(&nf_conntrack_locks_all_lock);
+               spin_lock(lock);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 {
        h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
        h1 %= CONNTRACK_LOCKS;
        h2 %= CONNTRACK_LOCKS;
        if (h1 <= h2) {
-               spin_lock(&nf_conntrack_locks[h1]);
+               nf_conntrack_lock(&nf_conntrack_locks[h1]);
                if (h1 != h2)
                        spin_lock_nested(&nf_conntrack_locks[h2],
                                         SINGLE_DEPTH_NESTING);
        } else {
-               spin_lock(&nf_conntrack_locks[h2]);
+               nf_conntrack_lock(&nf_conntrack_locks[h2]);
                spin_lock_nested(&nf_conntrack_locks[h1],
                                 SINGLE_DEPTH_NESTING);
        }
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
 {
        int i;
 
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_lock_nested(&nf_conntrack_locks[i], i);
+       spin_lock(&nf_conntrack_locks_all_lock);
+       nf_conntrack_locks_all = true;
+
+       for (i = 0; i < CONNTRACK_LOCKS; i++) {
+               spin_lock(&nf_conntrack_locks[i]);
+               spin_unlock(&nf_conntrack_locks[i]);
+       }
 }
 
 static void nf_conntrack_all_unlock(void)
 {
-       int i;
-
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_unlock(&nf_conntrack_locks[i]);
+       nf_conntrack_locks_all = false;
+       spin_unlock(&nf_conntrack_locks_all_lock);
 }
 
 unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
        hash = hash_bucket(_hash, net);
        for (; i < net->ct.htable_size; i++) {
                lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (read_seqcount_retry(&net->ct.generation, sequence)) {
                        spin_unlock(lockp);
                        goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
        for (; *bucket < net->ct.htable_size; (*bucket)++) {
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (*bucket < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
                                if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
index bd9d315..3b40ec5 100644 (file)
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
        }
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                unhelp(h, me);
index dbb1bb3..355e855 100644 (file)
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 restart:
                lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (cb->args[0] >= net->ct.htable_size) {
                        spin_unlock(lockp);
                        goto out;
index b6605e0..5eefe4a 100644 (file)
@@ -224,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
 
        nft_register_chain_type(&nft_filter_chain_netdev);
        ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-       if (ret < 0)
+       if (ret < 0) {
                nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+               return ret;
+       }
        register_netdevice_notifier(&nf_tables_netdev_notifier);
-
-       return ret;
+       return 0;
 }
 
 static void __exit nf_tables_netdev_exit(void)
index 5d010f2..94837d2 100644 (file)
@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
 
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                untimeout(h, timeout);
                }
-               spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
        }
        local_bh_enable();
 }
index 383c171..b78c28b 100644 (file)
@@ -46,16 +46,14 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 8; i++) {
-                               src64 = get_unaligned_be64(&src[i]);
-                               src64 = be64_to_cpu((__force __be64)src64);
+                               src64 = get_unaligned((u64 *)&src[i]);
                                put_unaligned_be64(src64, &dst[i]);
                        }
                        break;
                case NFT_BYTEORDER_HTON:
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = get_unaligned_be64(&src[i]);
-                               src64 = (__force u64)cpu_to_be64(src64);
-                               put_unaligned_be64(src64, &dst[i]);
+                               put_unaligned(src64, (u64 *)&dst[i]);
                        }
                        break;
                }
index a0eb216..d4a4619 100644 (file)
@@ -127,6 +127,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                               NF_CT_LABELS_MAX_SIZE - size);
                return;
        }
+#endif
        case NFT_CT_BYTES: /* fallthrough */
        case NFT_CT_PKTS: {
                const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                memcpy(dest, &count, sizeof(count));
                return;
        }
-#endif
        default:
                break;
        }
index b7c43de..e118397 100644 (file)
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        u8 nexthdr;
-       __be16 frag_off;
+       __be16 frag_off, oldlen, newlen;
        int tcphoff;
        int ret;
 
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
                return NF_DROP;
        if (ret > 0) {
                ipv6h = ipv6_hdr(skb);
-               ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+               oldlen = ipv6h->payload_len;
+               newlen = htons(ntohs(oldlen) + ret);
+               if (skb->ip_summed == CHECKSUM_COMPLETE)
+                       skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+                                            newlen);
+               ipv6h->payload_len = newlen;
        }
        return XT_CONTINUE;
 }
index 81dc1bb..f1ffb34 100644 (file)
@@ -2831,7 +2831,8 @@ static int netlink_dump(struct sock *sk)
         * reasonable static buffer based on the expected largest dump of a
         * single netdev. The outcome is MSG_TRUNC error.
         */
-       skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+       if (!netlink_rx_is_mmaped(sk))
+               skb_reserve(skb, skb_tailroom(skb) - alloc_size);
        netlink_skb_set_owner_r(skb, sk);
 
        len = cb->dump(skb, cb);
index f222885..9481d55 100644 (file)
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
 static void rds_ib_add_one(struct ib_device *device)
 {
        struct rds_ib_device *rds_ibdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle IB (no iWARP) devices */
        if (device->node_type != RDMA_NODE_IB_CA)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
                                 ibdev_to_node(device));
        if (!rds_ibdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_ibdev->spinlock);
        atomic_set(&rds_ibdev->refcount, 1);
        INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
-       rds_ibdev->max_wrs = dev_attr->max_qp_wr;
-       rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+       rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+       rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
 
-       rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
-       rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, (dev_attr->max_mr / 2),
+       rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+       rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, (device->attrs.max_mr / 2),
                      rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
 
-       rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+       rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
                      rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
 
-       rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
-       rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+       rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+       rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
 
        rds_ibdev->dev = device;
        rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
        }
 
        rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
-                dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+                device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
                 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
                 rds_ibdev->max_8k_fmrs);
 
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
 
 put_dev:
        rds_ib_dev_put(rds_ibdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 /*
index 576f182..f4a9fff 100644 (file)
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
 static void rds_iw_add_one(struct ib_device *device)
 {
        struct rds_iw_device *rds_iwdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle iwarp devices */
        if (device->node_type != RDMA_NODE_RNIC)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
        if (!rds_iwdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_iwdev->spinlock);
 
-       rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
-       rds_iwdev->max_wrs = dev_attr->max_qp_wr;
-       rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+       rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+       rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+       rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
 
        rds_iwdev->dev = device;
        rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
        list_add_tail(&rds_iwdev->list, &rds_iw_devices);
 
        ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
-       goto free_attr;
+       return;
 
 err_mr:
        if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
        ib_dealloc_pd(rds_iwdev->pd);
 free_dev:
        kfree(rds_iwdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 static void rds_iw_remove_one(struct ib_device *device, void *client_data)
index f53bf3b..cf5b69a 100644 (file)
@@ -1095,17 +1095,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
        return res;
 }
 
-static bool rfkill_readable(struct rfkill_data *data)
-{
-       bool r;
-
-       mutex_lock(&data->mtx);
-       r = !list_empty(&data->events);
-       mutex_unlock(&data->mtx);
-
-       return r;
-}
-
 static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                               size_t count, loff_t *pos)
 {
@@ -1122,8 +1111,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                        goto out;
                }
                mutex_unlock(&data->mtx);
+               /* since we re-check and it just compares pointers,
+                * using !list_empty() without locking isn't a problem
+                */
                ret = wait_event_interruptible(data->read_wait,
-                                              rfkill_readable(data));
+                                              !list_empty(&data->events));
                mutex_lock(&data->mtx);
 
                if (ret)
index f26bdea..a1cd778 100644 (file)
@@ -403,6 +403,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
                if (len <= cl->deficit) {
                        cl->deficit -= len;
                        skb = qdisc_dequeue_peeked(cl->qdisc);
+                       if (unlikely(skb == NULL))
+                               goto out;
                        if (cl->qdisc->q.qlen == 0)
                                list_del(&cl->alist);
 
index bf61dfb..49d2cc7 100644 (file)
@@ -935,15 +935,22 @@ static struct sctp_association *__sctp_lookup_association(
                                        struct sctp_transport **pt)
 {
        struct sctp_transport *t;
+       struct sctp_association *asoc = NULL;
 
+       rcu_read_lock();
        t = sctp_addrs_lookup_transport(net, local, peer);
-       if (!t || t->dead)
-               return NULL;
+       if (!t || !sctp_transport_hold(t))
+               goto out;
 
-       sctp_association_hold(t->asoc);
+       asoc = t->asoc;
+       sctp_association_hold(asoc);
        *pt = t;
 
-       return t->asoc;
+       sctp_transport_put(t);
+
+out:
+       rcu_read_unlock();
+       return asoc;
 }
 
 /* Look up an association. protected by RCU read lock */
@@ -955,9 +962,7 @@ struct sctp_association *sctp_lookup_association(struct net *net,
 {
        struct sctp_association *asoc;
 
-       rcu_read_lock();
        asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
-       rcu_read_unlock();
 
        return asoc;
 }
index 684c5b3..ded7d93 100644 (file)
@@ -165,8 +165,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
        list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
                        transports) {
                addr = &transport->ipaddr;
-               if (transport->dead)
-                       continue;
 
                af = sctp_get_af_specific(addr->sa.sa_family);
                if (af->cmp_addr(addr, primary)) {
@@ -380,6 +378,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
        }
 
        transport = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(transport))
+               return 0;
        assoc = transport->asoc;
        epb = &assoc->base;
        sk = epb->sk;
@@ -412,6 +412,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                sk->sk_rcvbuf);
        seq_printf(seq, "\n");
 
+       sctp_transport_put(transport);
+
        return 0;
 }
 
@@ -489,12 +491,12 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
        }
 
        tsp = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(tsp))
+               return 0;
        assoc = tsp->asoc;
 
        list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
                                transports) {
-               if (tsp->dead)
-                       continue;
                /*
                 * The remote address (ADDR)
                 */
@@ -544,6 +546,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
                seq_printf(seq, "\n");
        }
 
+       sctp_transport_put(tsp);
+
        return 0;
 }
 
index 2e21384..b5327bb 100644 (file)
@@ -259,12 +259,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
                goto out_unlock;
        }
 
-       /* Is this transport really dead and just waiting around for
-        * the timer to let go of the reference?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        /* Run through the state machine.  */
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
                goto out_unlock;
        }
 
-       /* Is this structure just waiting around for us to actually
-        * get destroyed?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
                           asoc->state, asoc->ep, asoc,
index 9bb80ec..5ca2ebf 100644 (file)
@@ -6636,6 +6636,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->srinfo->sinfo_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
@@ -6659,6 +6660,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->sinfo->snd_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
index aab9e3f..a431c14 100644 (file)
@@ -132,8 +132,6 @@ fail:
  */
 void sctp_transport_free(struct sctp_transport *transport)
 {
-       transport->dead = 1;
-
        /* Try to delete the heartbeat timer.  */
        if (del_timer(&transport->hb_timer))
                sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
  */
 static void sctp_transport_destroy(struct sctp_transport *transport)
 {
-       if (unlikely(!transport->dead)) {
+       if (unlikely(atomic_read(&transport->refcnt))) {
                WARN(1, "Attempt to destroy undead transport %p!\n", transport);
                return;
        }
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
 }
 
 /* Hold a reference to a transport.  */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
 {
-       atomic_inc(&transport->refcnt);
+       return atomic_add_unless(&transport->refcnt, 1, 0);
 }
 
 /* Release a reference to a transport and clean up
index 5e4f815..2b32fd6 100644 (file)
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        if (count == 0)
                return 0;
 
-       mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+       inode_lock(inode); /* protect against multiple concurrent
                              * readers on this file */
  again:
        spin_lock(&queue_lock);
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (rp->q.list.next == &cd->queue) {
                spin_unlock(&queue_lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(rp->offset);
                return 0;
        }
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (err == -EAGAIN)
                goto again;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err ? err :  count;
 }
 
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
        if (!cd->cache_parse)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = cache_downcall(mapping, buf, count, cd);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return ret;
 }
index 14f45bf..31789ef 100644 (file)
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
        int need_release;
        LIST_HEAD(free_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock(&pipe->lock);
        need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
        pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
        cancel_delayed_work_sync(&pipe->queue_timeout);
        rpc_inode_setowner(inode, NULL);
        RPC_I(inode)->pipe = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
        int first_open;
        int res = -ENXIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
                pipe->nwriters++;
        res = 0;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        struct rpc_pipe_msg *msg;
        int last_close;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        if (last_close && pipe->ops->release_pipe)
                pipe->ops->release_pipe(inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
        struct rpc_pipe_msg *msg;
        int res = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL) {
                res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
                pipe->ops->destroy_msg(msg);
        }
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
        struct inode *inode = file_inode(filp);
        int res;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = -EPIPE;
        if (RPC_I(inode)->pipe != NULL)
                res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 
        poll_wait(filp, &rpci->waitq, wait);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (rpci->pipe == NULL)
                mask |= POLLERR | POLLHUP;
        else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
                mask |= POLLIN | POLLRDNORM;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return mask;
 }
 
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case FIONREAD:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                pipe = RPC_I(inode)->pipe;
                if (pipe == NULL) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -EPIPE;
                }
                spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        len += msg->len - msg->copied;
                }
                spin_unlock(&pipe->lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return put_user(len, (int __user *)arg);
        default:
                return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
 {
        struct inode *dir = d_inode(parent);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(dir, I_MUTEX_CHILD);
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 }
 
 static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
        struct dentry *dentry;
        int i, err;
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        for (i = start; i < eof; i++) {
                dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
                err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
                if (err != 0)
                        goto out_bad;
        }
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return 0;
 out_bad:
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
                        __FILE__, __func__, parent);
        return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
        struct inode *dir = d_inode(parent);
        int error;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
                        goto err_rmdir;
        }
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 err_rmdir:
        __rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        if (depopulate != NULL)
                depopulate(dentry);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (pipe->ops->downcall == NULL)
                umode &= ~S_IWUGO;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (err)
                goto out_err;
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 out_err:
        dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmpipe(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
index 2e98f4a..37edea6 100644 (file)
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
        if (atomic_dec_and_test(&xprt->count))
                xprt_destroy(xprt);
 }
+EXPORT_SYMBOL_GPL(xprt_put);
index 33f99d3..dc9f3b5 100644 (file)
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
        fmr_ops.o frwr_ops.o physical_ops.o \
-       svc_rdma.o svc_rdma_transport.o \
+       svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
        svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
        module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
index c683684..e165673 100644 (file)
@@ -190,12 +190,11 @@ static int
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
             struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int depth, delta;
 
        ia->ri_max_frmr_depth =
                        min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-                             devattr->max_fast_reg_page_list_len);
+                             ia->ri_device->attrs.max_fast_reg_page_list_len);
        dprintk("RPC:       %s: device's max FR page list len = %u\n",
                __func__, ia->ri_max_frmr_depth);
 
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
        }
 
        ep->rep_attr.cap.max_send_wr *= depth;
-       if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-               cdata->max_requests = devattr->max_qp_wr / depth;
+       if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+               cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
                if (!cdata->max_requests)
                        return -EINVAL;
                ep->rep_attr.cap.max_send_wr = cdata->max_requests *
index 1b7051b..c846ca9 100644 (file)
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
 static unsigned int min_ord = 1;
 static unsigned int max_ord = 4096;
 unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
 static unsigned int max_max_requests = 16384;
 unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
 struct workqueue_struct *svc_rdma_wq;
 
 /*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
        svc_unreg_xprt_class(&svc_rdma_bc_class);
 #endif
        svc_unreg_xprt_class(&svc_rdma_class);
-       kmem_cache_destroy(svc_rdma_map_cachep);
-       kmem_cache_destroy(svc_rdma_ctxt_cachep);
 }
 
 int svc_rdma_init(void)
 {
        dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
        dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
-       dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
-       dprintk("\tsq_depth         : %d\n",
+       dprintk("\tmax_requests     : %u\n", svcrdma_max_requests);
+       dprintk("\tsq_depth         : %u\n",
                svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+       dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
        svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
 
-       /* Create the temporary map cache */
-       svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
-                                               sizeof(struct svc_rdma_req_map),
-                                               0,
-                                               SLAB_HWCACHE_ALIGN,
-                                               NULL);
-       if (!svc_rdma_map_cachep) {
-               printk(KERN_INFO "Could not allocate map cache.\n");
-               goto err0;
-       }
-
-       /* Create the temporary context cache */
-       svc_rdma_ctxt_cachep =
-               kmem_cache_create("svc_rdma_ctxt_cache",
-                                 sizeof(struct svc_rdma_op_ctxt),
-                                 0,
-                                 SLAB_HWCACHE_ALIGN,
-                                 NULL);
-       if (!svc_rdma_ctxt_cachep) {
-               printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
-               goto err1;
-       }
-
        /* Register RDMA with the SVC transport switch */
        svc_reg_xprt_class(&svc_rdma_class);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
        svc_reg_xprt_class(&svc_rdma_bc_class);
 #endif
        return 0;
- err1:
-       kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
-       unregister_sysctl_table(svcrdma_table_header);
-       destroy_workqueue(svc_rdma_wq);
-       return -ENOMEM;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644 (file)
index 0000000..65a7c23
--- /dev/null
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY        RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+                            struct xdr_buf *rcvbuf)
+{
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct kvec *dst, *src = &rcvbuf->head[0];
+       struct rpc_rqst *req;
+       unsigned long cwnd;
+       u32 credits;
+       size_t len;
+       __be32 xid;
+       __be32 *p;
+       int ret;
+
+       p = (__be32 *)src->iov_base;
+       len = src->iov_len;
+       xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: xid=%08x, length=%zu\n",
+               __func__, be32_to_cpu(xid), len);
+       pr_info("%s: RPC/RDMA: %*ph\n",
+               __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+       pr_info("%s:      RPC: %*ph\n",
+               __func__, (int)len, p);
+#endif
+
+       ret = -EAGAIN;
+       if (src->iov_len < 24)
+               goto out_shortreply;
+
+       spin_lock_bh(&xprt->transport_lock);
+       req = xprt_lookup_rqst(xprt, xid);
+       if (!req)
+               goto out_notfound;
+
+       dst = &req->rq_private_buf.head[0];
+       memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+       if (dst->iov_len < len)
+               goto out_unlock;
+       memcpy(dst->iov_base, p, len);
+
+       credits = be32_to_cpu(rmsgp->rm_credit);
+       if (credits == 0)
+               credits = 1;    /* don't deadlock */
+       else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+               credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+       cwnd = xprt->cwnd;
+       xprt->cwnd = credits << RPC_CWNDSHIFT;
+       if (xprt->cwnd > cwnd)
+               xprt_release_rqst_cong(req->rq_task);
+
+       ret = 0;
+       xprt_complete_rqst(req->rq_task, rcvbuf->len);
+       rcvbuf->len = 0;
+
+out_unlock:
+       spin_unlock_bh(&xprt->transport_lock);
+out:
+       return ret;
+
+out_shortreply:
+       dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+               xprt, src->iov_len);
+       goto out;
+
+out_notfound:
+       dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+               xprt, be32_to_cpu(xid));
+
+       goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+                             struct rpc_rqst *rqst)
+{
+       struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+       struct svc_rdma_op_ctxt *ctxt;
+       struct svc_rdma_req_map *vec;
+       struct ib_send_wr send_wr;
+       int ret;
+
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+       if (ret)
+               goto out_err;
+
+       /* Post a recv buffer to handle the reply for this request. */
+       ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+       if (ret) {
+               pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma);
+               set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+               goto out_err;
+       }
+
+       ctxt = svc_rdma_get_context(rdma);
+       ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+       ctxt->count = 1;
+
+       ctxt->wr_op = IB_WR_SEND;
+       ctxt->direction = DMA_TO_DEVICE;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+       ctxt->sge[0].length = sndbuf->len;
+       ctxt->sge[0].addr =
+           ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+                           sndbuf->len, DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+       atomic_inc(&rdma->sc_dma_used);
+
+       memset(&send_wr, 0, sizeof(send_wr));
+       send_wr.wr_id = (unsigned long)ctxt;
+       send_wr.sg_list = ctxt->sge;
+       send_wr.num_sge = 1;
+       send_wr.opcode = IB_WR_SEND;
+       send_wr.send_flags = IB_SEND_SIGNALED;
+
+       ret = svc_rdma_send(rdma, &send_wr);
+       if (ret) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+
+out_err:
+       svc_rdma_put_req_map(rdma, vec);
+       dprintk("svcrdma: %s returns %d\n", __func__, ret);
+       return ret;
+
+out_unmap:
+       svc_rdma_unmap_dma(ctxt);
+       svc_rdma_put_context(ctxt, 1);
+       goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       struct page *page;
+
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+       /* Prevent an infinite loop: try to make this case work */
+       if (size > PAGE_SIZE)
+               WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+                         size);
+
+       page = alloc_page(RPCRDMA_DEF_GFP);
+       if (!page)
+               return NULL;
+
+       return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+       /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+       struct rpc_xprt *xprt = rqst->rq_xprt;
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+       int rc;
+
+       /* Space in the send buffer for an RPC/RDMA header is reserved
+        * via xprt->tsh_size.
+        */
+       headerp->rm_xid = rqst->rq_xid;
+       headerp->rm_vers = rpcrdma_version;
+       headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+       headerp->rm_type = rdma_msg;
+       headerp->rm_body.rm_chunks[0] = xdr_zero;
+       headerp->rm_body.rm_chunks[1] = xdr_zero;
+       headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+       rc = svc_rdma_bc_sendto(rdma, rqst);
+       if (rc)
+               goto drop_connection;
+       return rc;
+
+drop_connection:
+       dprintk("svcrdma: failed to send bc call\n");
+       xprt_disconnect_done(xprt);
+       return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       int ret;
+
+       dprintk("svcrdma: sending bc call with xid: %08x\n",
+               be32_to_cpu(rqst->rq_xid));
+
+       if (!mutex_trylock(&sxprt->xpt_mutex)) {
+               rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+               if (!mutex_trylock(&sxprt->xpt_mutex))
+                       return -EAGAIN;
+               rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+       }
+
+       ret = -ENOTCONN;
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+       if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+               ret = rpcrdma_bc_send_request(rdma, rqst);
+
+       mutex_unlock(&sxprt->xpt_mutex);
+
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+       xprt_free(xprt);
+       module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+       .reserve_xprt           = xprt_reserve_xprt_cong,
+       .release_xprt           = xprt_release_xprt_cong,
+       .alloc_slot             = xprt_alloc_slot,
+       .release_request        = xprt_release_rqst_cong,
+       .buf_alloc              = xprt_rdma_bc_allocate,
+       .buf_free               = xprt_rdma_bc_free,
+       .send_request           = xprt_rdma_bc_send_request,
+       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .close                  = xprt_rdma_bc_close,
+       .destroy                = xprt_rdma_bc_put,
+       .print_stats            = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+       .to_initval = 60 * HZ,
+       .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+       struct rpc_xprt *xprt;
+       struct rpcrdma_xprt *new_xprt;
+
+       if (args->addrlen > sizeof(xprt->addr)) {
+               dprintk("RPC:       %s: address too large\n", __func__);
+               return ERR_PTR(-EBADF);
+       }
+
+       xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+                         RPCRDMA_MAX_BC_REQUESTS,
+                         RPCRDMA_MAX_BC_REQUESTS);
+       if (!xprt) {
+               dprintk("RPC:       %s: couldn't allocate rpc_xprt\n",
+                       __func__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       xprt->timeout = &xprt_rdma_bc_timeout;
+       xprt_set_bound(xprt);
+       xprt_set_connected(xprt);
+       xprt->bind_timeout = RPCRDMA_BIND_TO;
+       xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+       xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+       xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+       xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+       xprt->ops = &xprt_rdma_bc_procs;
+
+       memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+       xprt->addrlen = args->addrlen;
+       xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+       xprt->resvport = 0;
+
+       xprt->max_payload = xprt_rdma_max_inline_read;
+
+       new_xprt = rpcx_to_rdmax(xprt);
+       new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+       xprt_get(xprt);
+       args->bc_xprt->xpt_bc_xprt = xprt;
+       xprt->bc_xprt = args->bc_xprt;
+
+       if (!try_module_get(THIS_MODULE))
+               goto out_fail;
+
+       /* Final put for backchannel xprt is in __svc_rdma_free */
+       xprt_get(xprt);
+       return xprt;
+
+out_fail:
+       xprt_rdma_free_addresses(xprt);
+       args->bc_xprt->xpt_bc_xprt = NULL;
+       xprt_put(xprt);
+       xprt_free(xprt);
+       return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+       .list                   = LIST_HEAD_INIT(xprt_rdma_bc.list),
+       .name                   = "rdma backchannel",
+       .owner                  = THIS_MODULE,
+       .ident                  = XPRT_TRANSPORT_BC_RDMA,
+       .setup                  = xprt_setup_rdma_bc,
+};
index ff4f01e..c8b8a8b 100644 (file)
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 
                head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
                head->arg.page_len += len;
+
                head->arg.len += len;
                if (!pg_off)
                        head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
 
-               /* The lkey here is either a local dma lkey or a dma_mr lkey */
-               ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->sge[pno].length = len;
                ctxt->count++;
 
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
        return ret;
 }
 
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+       __be32 *p = (__be32 *)rmsgp;
+
+       if (!xprt->xpt_bc_xprt)
+               return false;
+
+       if (rmsgp->rm_type != rdma_msg)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+               return false;
+
+       /* sanity */
+       if (p[7] != rmsgp->rm_xid)
+               return false;
+       /* call direction */
+       if (p[8] == cpu_to_be32(RPC_CALL))
+               return false;
+
+       return true;
+}
+
 /*
  * Set up the rqstp thread context to point to the RQ buffer. If
  * necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                goto close_out;
        }
 
+       if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+               ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+                                              &rqstp->rq_arg);
+               svc_rdma_put_context(ctxt, 0);
+               if (ret)
+                       goto repost;
+               return ret;
+       }
+
        /* Read read-list data. */
        ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
        if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
        set_bit(XPT_CLOSE, &xprt->xpt_flags);
 defer:
        return 0;
+
+repost:
+       ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+       if (ret) {
+               pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+               set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+       }
+       return ret;
 }
index 969a1ab..df57f3c 100644 (file)
@@ -50,9 +50,9 @@
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
-static int map_xdr(struct svcxprt_rdma *xprt,
-                  struct xdr_buf *xdr,
-                  struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+                    struct xdr_buf *xdr,
+                    struct svc_rdma_req_map *vec)
 {
        int sge_no;
        u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 
        if (xdr->len !=
            (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
-               pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+               pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
                return -EIO;
        }
 
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
                sge_no++;
        }
 
-       dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+       dprintk("svcrdma: %s: sge_no %d page_no %d "
                "page_base %u page_len %u head_len %zu tail_len %zu\n",
-               sge_no, page_no, xdr->page_base, xdr->page_len,
+               __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
                xdr->head[0].iov_len, xdr->tail[0].iov_len);
 
        vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
                                         sge[sge_no].addr))
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
-               sge[sge_no].lkey = xprt->sc_dma_lkey;
+               sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count++;
                sge_off = 0;
                sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        int ret;
 
        /* Post a recv buffer to handle another request. */
-       ret = svc_rdma_post_recv(rdma);
+       ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
        if (ret) {
                printk(KERN_INFO
                       "svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        ctxt->count = 1;
 
        /* Prepare the SGE for the RPCRDMA Header */
-       ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
        ctxt->sge[0].addr =
            ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                                         ctxt->sge[sge_no].addr))
                        goto err;
                atomic_inc(&rdma->sc_dma_used);
-               ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
                ctxt->sge[sge_no].length = sge_bytes;
        }
        if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        /* Build an req vec for the XDR */
        ctxt = svc_rdma_get_context(rdma);
        ctxt->direction = DMA_TO_DEVICE;
-       vec = svc_rdma_get_req_map();
-       ret = map_xdr(rdma, &rqstp->rq_res, vec);
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
        if (ret)
                goto err0;
        inline_bytes = rqstp->rq_res.len;
 
        /* Create the RDMA response header */
-       res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       ret = -ENOMEM;
+       res_page = alloc_page(GFP_KERNEL);
+       if (!res_page)
+               goto err0;
        rdma_resp = page_address(res_page);
        reply_ary = svc_rdma_get_reply_array(rdma_argp);
        if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
        ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
                         inline_bytes);
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        dprintk("svcrdma: send_reply returns %d\n", ret);
        return ret;
 
  err1:
        put_page(res_page);
  err0:
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        svc_rdma_put_context(ctxt, 0);
        return ret;
 }
index b348b4a..5763825 100644 (file)
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+                                          gfp_t flags)
 {
        struct svc_rdma_op_ctxt *ctxt;
 
-       ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
-                               GFP_KERNEL | __GFP_NOFAIL);
-       ctxt->xprt = xprt;
-       INIT_LIST_HEAD(&ctxt->dto_q);
+       ctxt = kmalloc(sizeof(*ctxt), flags);
+       if (ctxt) {
+               ctxt->xprt = xprt;
+               INIT_LIST_HEAD(&ctxt->free);
+               INIT_LIST_HEAD(&ctxt->dto_q);
+       }
+       return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* Each RPC/RDMA credit can consume a number of send
+        * and receive WQEs. One ctxt is allocated for each.
+        */
+       i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+       while (i--) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+               if (!ctxt) {
+                       dprintk("svcrdma: No memory for RDMA ctxt\n");
+                       return false;
+               }
+               list_add(&ctxt->free, &xprt->sc_ctxts);
+       }
+       return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_op_ctxt *ctxt = NULL;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used++;
+       if (list_empty(&xprt->sc_ctxts))
+               goto out_empty;
+
+       ctxt = list_first_entry(&xprt->sc_ctxts,
+                               struct svc_rdma_op_ctxt, free);
+       list_del_init(&ctxt->free);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
        ctxt->count = 0;
        ctxt->frmr = NULL;
-       atomic_inc(&xprt->sc_ctxt_used);
        return ctxt;
+
+out_empty:
+       /* Either pre-allocation missed the mark, or send
+        * queue accounting is broken.
+        */
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+       ctxt = alloc_ctxt(xprt, GFP_NOIO);
+       if (ctxt)
+               goto out;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+       return NULL;
 }
 
 void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
        for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
                /*
                 * Unmap the DMA addr in the SGE if the lkey matches
-                * the sc_dma_lkey, otherwise, ignore it since it is
+                * the local_dma_lkey, otherwise, ignore it since it is
                 * an FRMR lkey and will be unmapped later when the
                 * last WR that uses it completes.
                 */
-               if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+               if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
                        atomic_dec(&xprt->sc_dma_used);
                        ib_dma_unmap_page(xprt->sc_cm_id->device,
                                            ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 {
-       struct svcxprt_rdma *xprt;
+       struct svcxprt_rdma *xprt = ctxt->xprt;
        int i;
 
-       xprt = ctxt->xprt;
        if (free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
 
-       kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
-       atomic_dec(&xprt->sc_ctxt_used);
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       list_add(&ctxt->free, &xprt->sc_ctxts);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
 }
 
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+       while (!list_empty(&xprt->sc_ctxts)) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = list_first_entry(&xprt->sc_ctxts,
+                                       struct svc_rdma_op_ctxt, free);
+               list_del(&ctxt->free);
+               kfree(ctxt);
+       }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
 {
        struct svc_rdma_req_map *map;
-       map = kmem_cache_alloc(svc_rdma_map_cachep,
-                              GFP_KERNEL | __GFP_NOFAIL);
+
+       map = kmalloc(sizeof(*map), flags);
+       if (map)
+               INIT_LIST_HEAD(&map->free);
+       return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* One for each receive buffer on this connection. */
+       i = xprt->sc_max_requests;
+
+       while (i--) {
+               struct svc_rdma_req_map *map;
+
+               map = alloc_req_map(GFP_KERNEL);
+               if (!map) {
+                       dprintk("svcrdma: No memory for request map\n");
+                       return false;
+               }
+               list_add(&map->free, &xprt->sc_maps);
+       }
+       return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_req_map *map = NULL;
+
+       spin_lock(&xprt->sc_map_lock);
+       if (list_empty(&xprt->sc_maps))
+               goto out_empty;
+
+       map = list_first_entry(&xprt->sc_maps,
+                              struct svc_rdma_req_map, free);
+       list_del_init(&map->free);
+       spin_unlock(&xprt->sc_map_lock);
+
+out:
        map->count = 0;
        return map;
+
+out_empty:
+       spin_unlock(&xprt->sc_map_lock);
+
+       /* Pre-allocation amount was incorrect */
+       map = alloc_req_map(GFP_NOIO);
+       if (map)
+               goto out;
+
+       WARN_ONCE(1, "svcrdma: empty request map list?\n");
+       return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+                         struct svc_rdma_req_map *map)
+{
+       spin_lock(&xprt->sc_map_lock);
+       list_add(&map->free, &xprt->sc_maps);
+       spin_unlock(&xprt->sc_map_lock);
 }
 
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
 {
-       kmem_cache_free(svc_rdma_map_cachep, map);
+       while (!list_empty(&xprt->sc_maps)) {
+               struct svc_rdma_req_map *map;
+
+               map = list_first_entry(&xprt->sc_maps,
+                                      struct svc_rdma_req_map, free);
+               list_del(&map->free);
+               kfree(map);
+       }
 }
 
 /* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 static void process_context(struct svcxprt_rdma *xprt,
                            struct svc_rdma_op_ctxt *ctxt)
 {
+       struct svc_rdma_op_ctxt *read_hdr;
+       int free_pages = 0;
+
        svc_rdma_unmap_dma(ctxt);
 
        switch (ctxt->wr_op) {
        case IB_WR_SEND:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 1);
+               free_pages = 1;
                break;
 
        case IB_WR_RDMA_WRITE:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 0);
                break;
 
        case IB_WR_RDMA_READ:
        case IB_WR_RDMA_READ_WITH_INV:
                svc_rdma_put_frmr(xprt, ctxt->frmr);
-               if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-                       struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-                       if (read_hdr) {
-                               spin_lock_bh(&xprt->sc_rq_dto_lock);
-                               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-                               list_add_tail(&read_hdr->dto_q,
-                                             &xprt->sc_read_complete_q);
-                               spin_unlock_bh(&xprt->sc_rq_dto_lock);
-                       } else {
-                               pr_err("svcrdma: ctxt->read_hdr == NULL\n");
-                       }
-                       svc_xprt_enqueue(&xprt->sc_xprt);
-               }
+
+               if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+                       break;
+
+               read_hdr = ctxt->read_hdr;
                svc_rdma_put_context(ctxt, 0);
-               break;
+
+               spin_lock_bh(&xprt->sc_rq_dto_lock);
+               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+               list_add_tail(&read_hdr->dto_q,
+                             &xprt->sc_read_complete_q);
+               spin_unlock_bh(&xprt->sc_rq_dto_lock);
+               svc_xprt_enqueue(&xprt->sc_xprt);
+               return;
 
        default:
-               printk(KERN_ERR "svcrdma: unexpected completion type, "
-                      "opcode=%d\n",
-                      ctxt->wr_op);
+               dprintk("svcrdma: unexpected completion opcode=%d\n",
+                       ctxt->wr_op);
                break;
        }
+
+       svc_rdma_put_context(ctxt, free_pages);
 }
 
 /*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+       INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+       INIT_LIST_HEAD(&cma_xprt->sc_maps);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
-       cma_xprt->sc_ord = svcrdma_ord;
-
-       cma_xprt->sc_max_req_size = svcrdma_max_req_size;
-       cma_xprt->sc_max_requests = svcrdma_max_requests;
-       cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
-       atomic_set(&cma_xprt->sc_sq_count, 0);
-       atomic_set(&cma_xprt->sc_ctxt_used, 0);
+       spin_lock_init(&cma_xprt->sc_ctxt_lock);
+       spin_lock_init(&cma_xprt->sc_map_lock);
 
        if (listener)
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        return cma_xprt;
 }
 
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 {
        struct ib_recv_wr recv_wr, *bad_recv_wr;
        struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                        pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                        goto err_put_ctxt;
                }
-               page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+               page = alloc_page(flags);
+               if (!page)
+                       goto err_put_ctxt;
                ctxt->pages[sge_no] = page;
                pa = ib_dma_map_page(xprt->sc_cm_id->device,
                                     page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                atomic_inc(&xprt->sc_dma_used);
                ctxt->sge[sge_no].addr = pa;
                ctxt->sge[sge_no].length = PAGE_SIZE;
-               ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count = sge_no + 1;
                buflen += PAGE_SIZE;
        }
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        struct rdma_conn_param conn_param;
        struct ib_cq_init_attr cq_attr = {};
        struct ib_qp_init_attr qp_attr;
-       struct ib_device_attr devattr;
-       int uninitialized_var(dma_mr_acc);
-       int need_dma_mr = 0;
-       int ret;
-       int i;
+       struct ib_device *dev;
+       unsigned int i;
+       int ret = 0;
 
        listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
        clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
                newxprt, newxprt->sc_cm_id);
 
-       ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
-       if (ret) {
-               dprintk("svcrdma: could not query device attributes on "
-                       "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
-               goto errout;
-       }
+       dev = newxprt->sc_cm_id->device;
 
        /* Qualify the transport resource defaults with the
         * capabilities of this particular device */
-       newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+       newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
                                  (size_t)RPCSVC_MAXPAGES);
-       newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+       newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
                                       RPCSVC_MAXPAGES);
-       newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
-                                  (size_t)svcrdma_max_requests);
-       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+       newxprt->sc_max_req_size = svcrdma_max_req_size;
+       newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                        svcrdma_max_requests);
+       newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                           svcrdma_max_bc_requests);
+       newxprt->sc_rq_depth = newxprt->sc_max_requests +
+                              newxprt->sc_max_bc_requests;
+       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+       if (!svc_rdma_prealloc_ctxts(newxprt))
+               goto errout;
+       if (!svc_rdma_prealloc_maps(newxprt))
+               goto errout;
 
        /*
         * Limit ORD based on client limit, local device limit, and
         * configured svcrdma limit.
         */
-       newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+       newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
        newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
 
-       newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+       newxprt->sc_pd = ib_alloc_pd(dev);
        if (IS_ERR(newxprt->sc_pd)) {
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
        cq_attr.cqe = newxprt->sc_sq_depth;
-       newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       newxprt->sc_sq_cq = ib_create_cq(dev,
                                         sq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
-       cq_attr.cqe = newxprt->sc_max_requests;
-       newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       cq_attr.cqe = newxprt->sc_rq_depth;
+       newxprt->sc_rq_cq = ib_create_cq(dev,
                                         rq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        qp_attr.event_handler = qp_event_handler;
        qp_attr.qp_context = &newxprt->sc_xprt;
        qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
-       qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+       qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
        qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
        qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
        qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                "    cap.max_send_sge = %d\n"
                "    cap.max_recv_sge = %d\n",
                newxprt->sc_cm_id, newxprt->sc_pd,
-               newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+               dev, newxprt->sc_pd->device,
                qp_attr.cap.max_send_wr,
                qp_attr.cap.max_recv_wr,
                qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
         *      of an RDMA_READ. IB does not.
         */
        newxprt->sc_reader = rdma_read_chunk_lcl;
-       if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                newxprt->sc_frmr_pg_list_len =
-                       devattr.max_fast_reg_page_list_len;
+                       dev->attrs.max_fast_reg_page_list_len;
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
                newxprt->sc_reader = rdma_read_chunk_frmr;
        }
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        /*
         * Determine if a DMA MR is required and if so, what privs are required
         */
-       if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                newxprt->sc_cm_id->port_num) &&
-           !rdma_ib_or_roce(newxprt->sc_cm_id->device,
-                            newxprt->sc_cm_id->port_num))
+       if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+           !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
                goto errout;
 
-       if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
-           !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
-               need_dma_mr = 1;
-               dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                       newxprt->sc_cm_id->port_num) &&
-                   !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
-                       dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
-       }
-
-       if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                               newxprt->sc_cm_id->port_num))
+       if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
 
-       /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
-       if (need_dma_mr) {
-               /* Register all of physical memory */
-               newxprt->sc_phys_mr =
-                       ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
-               if (IS_ERR(newxprt->sc_phys_mr)) {
-                       dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
-                               ret);
-                       goto errout;
-               }
-               newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
-       } else
-               newxprt->sc_dma_lkey =
-                       newxprt->sc_cm_id->device->local_dma_lkey;
-
        /* Post receive buffers */
-       for (i = 0; i < newxprt->sc_max_requests; i++) {
-               ret = svc_rdma_post_recv(newxprt);
+       for (i = 0; i < newxprt->sc_rq_depth; i++) {
+               ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
                if (ret) {
                        dprintk("svcrdma: failure posting receive buffers\n");
                        goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
 {
        struct svcxprt_rdma *rdma =
                container_of(work, struct svcxprt_rdma, sc_work);
-       dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+       struct svc_xprt *xprt = &rdma->sc_xprt;
+
+       dprintk("svcrdma: %s(%p)\n", __func__, rdma);
 
        /* We should only be called from kref_put */
-       if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+       if (atomic_read(&xprt->xpt_ref.refcount) != 0)
                pr_err("svcrdma: sc_xprt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+                      atomic_read(&xprt->xpt_ref.refcount));
 
        /*
         * Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
        }
 
        /* Warn if we leaked a resource or under-referenced */
-       if (atomic_read(&rdma->sc_ctxt_used) != 0)
+       if (rdma->sc_ctxt_used != 0)
                pr_err("svcrdma: ctxt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_ctxt_used));
+                      rdma->sc_ctxt_used);
        if (atomic_read(&rdma->sc_dma_used) != 0)
                pr_err("svcrdma: dma still in use? (%d)\n",
                       atomic_read(&rdma->sc_dma_used));
 
-       /* De-allocate fastreg mr */
+       /* Final put of backchannel client transport */
+       if (xprt->xpt_bc_xprt) {
+               xprt_put(xprt->xpt_bc_xprt);
+               xprt->xpt_bc_xprt = NULL;
+       }
+
        rdma_dealloc_frmr_q(rdma);
+       svc_rdma_destroy_ctxts(rdma);
+       svc_rdma_destroy_maps(rdma);
 
        /* Destroy the QP if present (not a listener) */
        if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
        if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
                ib_destroy_cq(rdma->sc_rq_cq);
 
-       if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
-               ib_dereg_mr(rdma->sc_phys_mr);
-
        if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
                ib_dealloc_pd(rdma->sc_pd);
 
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
        int length;
        int ret;
 
-       p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       p = alloc_page(GFP_KERNEL);
+       if (!p)
+               return;
        va = page_address(p);
 
        /* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
                return;
        }
        atomic_inc(&xprt->sc_dma_used);
-       ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+       ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = length;
 
        /* Prepare SEND WR */
index 740bddc..b1b009f 100644 (file)
@@ -63,7 +63,7 @@
  */
 
 static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
 
 #endif
 
-#define RPCRDMA_BIND_TO                (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs;    /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs;    /*forward reference */
 
 static void
 xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
 }
 
-static void
+void
 xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
 {
        char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 }
 
-static void
+void
 xprt_rdma_free_addresses(struct rpc_xprt *xprt)
 {
        unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
        if (req == NULL)
                return NULL;
 
-       flags = GFP_NOIO | __GFP_NOWARN;
+       flags = RPCRDMA_DEF_GFP;
        if (RPC_IS_SWAPPER(task))
                flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
@@ -642,7 +637,7 @@ drop_connection:
        return -ENOTCONN;       /* implies disconnect */
 }
 
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        long idle_time = 0;
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void)
 
        rpcrdma_destroy_wq();
        frwr_destroy_recovery_wq();
+
+       rc = xprt_unregister_transport(&xprt_rdma_bc);
+       if (rc)
+               dprintk("RPC:       %s: xprt_unregister(bc) returned %i\n",
+                       __func__, rc);
 }
 
 int xprt_rdma_init(void)
@@ -766,6 +766,14 @@ int xprt_rdma_init(void)
                return rc;
        }
 
+       rc = xprt_register_transport(&xprt_rdma_bc);
+       if (rc) {
+               xprt_unregister_transport(&xprt_rdma);
+               rpcrdma_destroy_wq();
+               frwr_destroy_recovery_wq();
+               return rc;
+       }
+
        dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
 
        dprintk("Defaults:\n");
index 732c71c..878f1bf 100644 (file)
@@ -462,7 +462,6 @@ int
 rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 {
        struct rpcrdma_ia *ia = &xprt->rx_ia;
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int rc;
 
        ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out2;
        }
 
-       rc = ib_query_device(ia->ri_device, devattr);
-       if (rc) {
-               dprintk("RPC:       %s: ib_query_device failed %d\n",
-                       __func__, rc);
-               goto out3;
-       }
-
        if (memreg == RPCRDMA_FRMR) {
-               if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
-                   (devattr->max_fast_reg_page_list_len == 0)) {
+               if (!(ia->ri_device->attrs.device_cap_flags &
+                               IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+                   (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
                        dprintk("RPC:       %s: FRMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                                struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        struct ib_cq *sendcq, *recvcq;
        struct ib_cq_init_attr cq_attr = {};
        unsigned int max_qp_wr;
        int rc, err;
 
-       if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+       if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
                dprintk("RPC:       %s: insufficient sge's available\n",
                        __func__);
                return -ENOMEM;
        }
 
-       if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+       if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
                dprintk("RPC:       %s: insufficient wqe's available\n",
                        __func__);
                return -ENOMEM;
        }
-       max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
 
        /* check provider's send/recv wr limits */
        if (cdata->max_requests > max_qp_wr)
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
        /* Client offers RDMA Read but does not initiate */
        ep->rep_remote_cma.initiator_depth = 0;
-       if (devattr->max_qp_rd_atom > 32)       /* arbitrary but <= 255 */
+       if (ia->ri_device->attrs.max_qp_rd_atom > 32)   /* arbitrary but <= 255 */
                ep->rep_remote_cma.responder_resources = 32;
        else
                ep->rep_remote_cma.responder_resources =
-                                               devattr->max_qp_rd_atom;
+                                               ia->ri_device->attrs.max_qp_rd_atom;
 
        ep->rep_remote_cma.retry_count = 7;
        ep->rep_remote_cma.flow_control = 0;
index 728101d..38fe11b 100644 (file)
 #define RDMA_RESOLVE_TIMEOUT   (5000)  /* 5 seconds */
 #define RDMA_CONNECT_RETRY_MAX (2)     /* retries if no listener backlog */
 
+#define RPCRDMA_BIND_TO                (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
+
 /*
  * Interface Adapter -- one per transport instance
  */
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
        struct completion       ri_done;
        int                     ri_async_rc;
        unsigned int            ri_max_frmr_depth;
-       struct ib_device_attr   ri_devattr;
        struct ib_qp_attr       ri_qp_attr;
        struct ib_qp_init_attr  ri_qp_init_attr;
 };
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
        return (struct rpcrdma_msg *)rb->rg_base;
 }
 
+#define RPCRDMA_DEF_GFP                (GFP_NOIO | __GFP_NOWARN)
+
 /*
  * struct rpcrdma_rep -- this structure encapsulates state required to recv
  * and complete a reply, asychronously. It needs several pieces of
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
        u32                     rb_bc_srv_max_requests;
        spinlock_t              rb_reqslock;    /* protect rb_allreqs */
        struct list_head        rb_allreqs;
+
+       u32                     rb_bc_max_requests;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);
 
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
 void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/* Temporary NFS request map cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
 
 #endif                         /* _LINUX_SUNRPC_XPRT_RDMA_H */
index ebc661d..47f7da5 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
 
@@ -567,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
 
-static DEFINE_MUTEX(switchdev_mutex);
 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -582,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
  *     Call all network notifier blocks. This should be called by driver
  *     when it needs to propagate hardware event.
  *     Return values are same as for atomic_notifier_call_chain().
+ *     rtnl_lock must be held.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
                             struct switchdev_notifier_info *info)
 {
        int err;
 
+       ASSERT_RTNL();
+
        info->dev = dev;
-       mutex_lock(&switchdev_mutex);
        err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
-       mutex_unlock(&switchdev_mutex);
        return err;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
index 350cca3..69ee2ee 100644 (file)
@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
                                struct sockaddr_tipc *addr, void *usr_data,
                                void *buf, size_t len)
 {
-       struct tipc_subscriber *subscriber = usr_data;
+       struct tipc_subscriber *subscrb = usr_data;
        struct tipc_subscription *sub = NULL;
        struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-       tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
-       if (sub)
-               tipc_nametbl_subscribe(sub);
-       else
-               tipc_conn_terminate(tn->topsrv, subscriber->conid);
+       if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+               return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
+       tipc_nametbl_subscribe(sub);
 }
 
 /* Handle one request to establish a new subscriber */
index c5bf5ef..49d5093 100644 (file)
@@ -2339,6 +2339,7 @@ again:
 
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
+                               scm_destroy(&scm);
                                goto out;
                        }
 
index 3b0ce1c..547ceec 100644 (file)
@@ -231,20 +231,22 @@ static const struct ieee80211_regdomain world_regdom = {
                /* IEEE 802.11b/g, channels 1..11 */
                REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
                /* IEEE 802.11b/g, channels 12..13. */
-               REG_RULE(2467-10, 2472+10, 40, 6, 20,
-                       NL80211_RRF_NO_IR),
+               REG_RULE(2467-10, 2472+10, 20, 6, 20,
+                       NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
                /* IEEE 802.11 channel 14 - Only JP enables
                 * this and for 802.11b only */
                REG_RULE(2484-10, 2484+10, 20, 6, 20,
                        NL80211_RRF_NO_IR |
                        NL80211_RRF_NO_OFDM),
                /* IEEE 802.11a, channel 36..48 */
-               REG_RULE(5180-10, 5240+10, 160, 6, 20,
-                        NL80211_RRF_NO_IR),
+               REG_RULE(5180-10, 5240+10, 80, 6, 20,
+                        NL80211_RRF_NO_IR |
+                        NL80211_RRF_AUTO_BW),
 
                /* IEEE 802.11a, channel 52..64 - DFS required */
-               REG_RULE(5260-10, 5320+10, 160, 6, 20,
+               REG_RULE(5260-10, 5320+10, 80, 6, 20,
                        NL80211_RRF_NO_IR |
+                       NL80211_RRF_AUTO_BW |
                        NL80211_RRF_DFS),
 
                /* IEEE 802.11a, channel 100..144 - DFS required */
@@ -2745,7 +2747,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
        const struct ieee80211_power_rule *power_rule = NULL;
        char bw[32], cac_time[32];
 
-       pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+       pr_debug("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
 
        for (i = 0; i < rd->n_reg_rules; i++) {
                reg_rule = &rd->reg_rules[i];
@@ -2772,7 +2774,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                 * in certain regions
                 */
                if (power_rule->max_antenna_gain)
-                       pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2780,7 +2782,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                                power_rule->max_eirp,
                                cac_time);
                else
-                       pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2813,35 +2815,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
                        struct cfg80211_registered_device *rdev;
                        rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
                        if (rdev) {
-                               pr_info("Current regulatory domain updated by AP to: %c%c\n",
+                               pr_debug("Current regulatory domain updated by AP to: %c%c\n",
                                        rdev->country_ie_alpha2[0],
                                        rdev->country_ie_alpha2[1]);
                        } else
-                               pr_info("Current regulatory domain intersected:\n");
+                               pr_debug("Current regulatory domain intersected:\n");
                } else
-                       pr_info("Current regulatory domain intersected:\n");
+                       pr_debug("Current regulatory domain intersected:\n");
        } else if (is_world_regdom(rd->alpha2)) {
-               pr_info("World regulatory domain updated:\n");
+               pr_debug("World regulatory domain updated:\n");
        } else {
                if (is_unknown_alpha2(rd->alpha2))
-                       pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+                       pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
                else {
                        if (reg_request_cell_base(lr))
-                               pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+                               pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                        else
-                               pr_info("Regulatory domain changed to country: %c%c\n",
+                               pr_debug("Regulatory domain changed to country: %c%c\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                }
        }
 
-       pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+       pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
        print_rd_rules(rd);
 }
 
 static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
-       pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+       pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
        print_rd_rules(rd);
 }
 
@@ -2862,7 +2864,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2898,7 +2901,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2956,7 +2960,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
         */
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
index e080746..48958d3 100644 (file)
@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname)
                if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
                    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
                    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+                   strcmp(symname, ".TOC.") == 0)
                        return 1;
        /* Do not ignore this symbol */
        return 0;
diff --git a/scripts/prune-kernel b/scripts/prune-kernel
new file mode 100755 (executable)
index 0000000..ab5034e
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# because I use CONFIG_LOCALVERSION_AUTO, not the same version again and
+# again, /boot and /lib/modules/ eventually fill up.
+# Dumb script to purge that stuff:
+
+for f in "$@"
+do
+        if rpm -qf "/lib/modules/$f" >/dev/null; then
+                echo "keeping $f (installed from rpm)"
+        elif [ $(uname -r) = "$f" ]; then
+                echo "keeping $f (running kernel) "
+        else
+                echo "removing $f"
+                rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
+                rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
+                rm -rf "/lib/modules/$f"
+                new-kernel-pkg --remove $f
+        fi
+done
index 16622ae..28414b0 100644 (file)
@@ -99,7 +99,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
 
        dir = d_inode(parent);
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry))
                goto out;
@@ -129,14 +129,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
        }
        d_instantiate(dentry, inode);
        dget(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 
 out1:
        dput(dentry);
        dentry = ERR_PTR(error);
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        simple_release_fs(&mount, &mount_count);
        return dentry;
 }
@@ -195,7 +195,7 @@ void securityfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -203,7 +203,7 @@ void securityfs_remove(struct dentry *dentry)
                        simple_unlink(d_inode(parent), dentry);
                dput(dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        simple_release_fs(&mount, &mount_count);
 }
 EXPORT_SYMBOL_GPL(securityfs_remove);
index c21f09b..9d96551 100644 (file)
@@ -121,7 +121,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
        if (!(mode & FMODE_WRITE))
                return;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (atomic_read(&inode->i_writecount) == 1) {
                if ((iint->version != inode->i_version) ||
                    (iint->flags & IMA_NEW_FILE)) {
@@ -130,7 +130,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
                                ima_update_xattr(iint, file);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 /**
@@ -186,7 +186,7 @@ static int process_measurement(struct file *file, int mask, int function,
        if (action & IMA_FILE_APPRAISE)
                function = FILE_CHECK;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (action) {
                iint = integrity_inode_get(inode);
@@ -250,7 +250,7 @@ out_free:
        if (pathbuf)
                __putname(pathbuf);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
                return -EACCES;
        return 0;
index 07a8731..09ef276 100644 (file)
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring) {
-                               set_bit(KEY_FLAG_KEEP, &key->flags);
+                               if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+                                       set_bit(KEY_FLAG_KEEP, &key->flags);
 
                                __key_link(key, _edit);
                        }
index 732c1c7..1b1fd27 100644 (file)
@@ -380,9 +380,9 @@ static int sel_open_policy(struct inode *inode, struct file *filp)
                goto err;
 
        if (i_size_read(inode) != security_policydb_len()) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, security_policydb_len());
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        rc = security_read_policy(&plm->data, &plm->len);
index e3e9491..a2a1e24 100644 (file)
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
        bool "PCM timer interface" if EXPERT
        default y
        help
-         If you disable this option, pcm timer will be inavailable, so
-         those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+         If you disable this option, pcm timer will be unavailable, so
+         those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
          incorrectlly.
 
-         For some embedded device, we may disable it to reduce memory
+         For some embedded devices, we may disable it to reduce memory
          footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
index 18b8dc4..7fac3ca 100644 (file)
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *     SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
        return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@ out:
        kfree(caps);
        return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
        case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
                retval = snd_compr_get_caps(stream, arg);
                break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
        case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
                retval = snd_compr_get_codec_caps(stream, arg);
                break;
+#endif
        case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
                retval = snd_compr_set_params(stream, arg);
                break;
index 196a6fe..a85d455 100644 (file)
@@ -1405,6 +1405,8 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
                return -EFAULT;
        if (tlv.length < sizeof(unsigned int) * 2)
                return -EINVAL;
+       if (!tlv.numid)
+               return -EINVAL;
        down_read(&card->controls_rwsem);
        kctl = snd_ctl_find_numid(card, tlv.numid);
        if (kctl == NULL) {
index f845ecf..656d9a9 100644 (file)
@@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t)
        struct snd_hrtimer *stime = t->private_data;
 
        atomic_set(&stime->running, 0);
-       hrtimer_cancel(&stime->hrt);
+       hrtimer_try_to_cancel(&stime->hrt);
        hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution),
                      HRTIMER_MODE_REL);
        atomic_set(&stime->running, 1);
@@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t)
 {
        struct snd_hrtimer *stime = t->private_data;
        atomic_set(&stime->running, 0);
+       hrtimer_try_to_cancel(&stime->hrt);
        return 0;
 }
 
index 0e73d03..ebc9fdf 100644 (file)
@@ -835,7 +835,8 @@ static int choose_rate(struct snd_pcm_substream *substream,
        return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL);
 }
 
-static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
+static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream,
+                                    bool trylock)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_hw_params *params, *sparams;
@@ -849,7 +850,10 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
        struct snd_mask sformat_mask;
        struct snd_mask mask;
 
-       if (mutex_lock_interruptible(&runtime->oss.params_lock))
+       if (trylock) {
+               if (!(mutex_trylock(&runtime->oss.params_lock)))
+                       return -EAGAIN;
+       } else if (mutex_lock_interruptible(&runtime->oss.params_lock))
                return -EINTR;
        sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL);
        params = kmalloc(sizeof(*params), GFP_KERNEL);
@@ -1092,7 +1096,7 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
                if (asubstream == NULL)
                        asubstream = substream;
                if (substream->runtime->oss.params) {
-                       err = snd_pcm_oss_change_params(substream);
+                       err = snd_pcm_oss_change_params(substream, false);
                        if (err < 0)
                                return err;
                }
@@ -1132,7 +1136,7 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream)
                return 0;
        runtime = substream->runtime;
        if (runtime->oss.params) {
-               err = snd_pcm_oss_change_params(substream);
+               err = snd_pcm_oss_change_params(substream, false);
                if (err < 0)
                        return err;
        }
@@ -2163,7 +2167,7 @@ static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stre
        runtime = substream->runtime;
 
        if (runtime->oss.params &&
-           (err = snd_pcm_oss_change_params(substream)) < 0)
+           (err = snd_pcm_oss_change_params(substream, false)) < 0)
                return err;
 
        info.fragsize = runtime->oss.period_bytes;
@@ -2804,7 +2808,12 @@ static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area)
                return -EIO;
        
        if (runtime->oss.params) {
-               if ((err = snd_pcm_oss_change_params(substream)) < 0)
+               /* use mutex_trylock() for params_lock for avoiding a deadlock
+                * between mmap_sem and params_lock taken by
+                * copy_from/to_user() in snd_pcm_oss_write/read()
+                */
+               err = snd_pcm_oss_change_params(substream, true);
+               if (err < 0)
                        return err;
        }
 #ifdef CONFIG_SND_PCM_OSS_PLUGINS
index b48b434..9630e9f 100644 (file)
@@ -255,10 +255,15 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
        if (! (runtime = substream->runtime))
                return -ENOTTY;
 
-       /* only fifo_size is different, so just copy all */
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       /* only fifo_size (RO from userspace) is different, so just copy all */
+       if (copy_from_user(data, data32, sizeof(*data32))) {
+               err = -EFAULT;
+               goto error;
+       }
 
        if (refine)
                err = snd_pcm_hw_refine(substream, data);
index a775984..795437b 100644 (file)
@@ -942,31 +942,36 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long result = 0, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
+       spin_lock_irqsave(&runtime->lock, flags);
        while (count > 0 && runtime->avail) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
                        count1 = count;
-               spin_lock_irqsave(&runtime->lock, flags);
                if (count1 > (int)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1);
+                       memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1);
                if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
                        if (copy_to_user(userbuf + result,
-                                        runtime->buffer + runtime->appl_ptr, count1)) {
+                                        runtime->buffer + appl_ptr, count1)) {
                                return result > 0 ? result : -EFAULT;
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
-               spin_unlock_irqrestore(&runtime->lock, flags);
                result += count1;
                count -= count1;
        }
+       spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 
@@ -1055,23 +1060,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream)
 EXPORT_SYMBOL(snd_rawmidi_transmit_empty);
 
 /**
- * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * __snd_rawmidi_transmit_peek - copy data from the internal buffer
  * @substream: the rawmidi substream
  * @buffer: the buffer pointer
  * @count: data size to transfer
  *
- * Copies data from the internal output buffer to the given buffer.
- *
- * Call this in the interrupt handler when the midi output is ready,
- * and call snd_rawmidi_transmit_ack() after the transmission is
- * finished.
- *
- * Return: The size of copied data, or a negative error code on failure.
+ * This is a variant of snd_rawmidi_transmit_peek() without spinlock.
  */
-int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                              unsigned char *buffer, int count)
 {
-       unsigned long flags;
        int result, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
@@ -1081,7 +1079,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                return -EINVAL;
        }
        result = 0;
-       spin_lock_irqsave(&runtime->lock, flags);
        if (runtime->avail >= runtime->buffer_size) {
                /* warning: lowlevel layer MUST trigger down the hardware */
                goto __skip;
@@ -1106,25 +1103,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                }
        }
       __skip:
+       return result;
+}
+EXPORT_SYMBOL(__snd_rawmidi_transmit_peek);
+
+/**
+ * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * @substream: the rawmidi substream
+ * @buffer: the buffer pointer
+ * @count: data size to transfer
+ *
+ * Copies data from the internal output buffer to the given buffer.
+ *
+ * Call this in the interrupt handler when the midi output is ready,
+ * and call snd_rawmidi_transmit_ack() after the transmission is
+ * finished.
+ *
+ * Return: The size of copied data, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_peek(substream, buffer, count);
        spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit_peek);
 
 /**
- * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * __snd_rawmidi_transmit_ack - acknowledge the transmission
  * @substream: the rawmidi substream
  * @count: the transferred count
  *
- * Advances the hardware pointer for the internal output buffer with
- * the given size and updates the condition.
- * Call after the transmission is finished.
- *
- * Return: The advanced size if successful, or a negative error code on failure.
+ * This is a variant of __snd_rawmidi_transmit_ack() without spinlock.
  */
-int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
 {
-       unsigned long flags;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
        if (runtime->buffer == NULL) {
@@ -1132,7 +1151,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                          "snd_rawmidi_transmit_ack: output is not active!!!\n");
                return -EINVAL;
        }
-       spin_lock_irqsave(&runtime->lock, flags);
        snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
        runtime->hw_ptr += count;
        runtime->hw_ptr %= runtime->buffer_size;
@@ -1142,9 +1160,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                if (runtime->drain || snd_rawmidi_ready(substream))
                        wake_up(&runtime->sleep);
        }
-       spin_unlock_irqrestore(&runtime->lock, flags);
        return count;
 }
+EXPORT_SYMBOL(__snd_rawmidi_transmit_ack);
+
+/**
+ * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * @substream: the rawmidi substream
+ * @count: the transferred count
+ *
+ * Advances the hardware pointer for the internal output buffer with
+ * the given size and updates the condition.
+ * Call after the transmission is finished.
+ *
+ * Return: The advanced size if successful, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_ack(substream, count);
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
+}
 EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 
 /**
@@ -1160,12 +1201,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count)
 {
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
        if (!substream->opened)
-               return -EBADFD;
-       count = snd_rawmidi_transmit_peek(substream, buffer, count);
-       if (count < 0)
-               return count;
-       return snd_rawmidi_transmit_ack(substream, count);
+               result = -EBADFD;
+       else {
+               count = __snd_rawmidi_transmit_peek(substream, buffer, count);
+               if (count <= 0)
+                       result = count;
+               else
+                       result = __snd_rawmidi_transmit_ack(substream, count);
+       }
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit);
 
@@ -1177,8 +1228,9 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long count1, result;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
-       if (snd_BUG_ON(!kernelbuf && !userbuf))
+       if (!kernelbuf && !userbuf)
                return -EINVAL;
        if (snd_BUG_ON(!runtime->buffer))
                return -EINVAL;
@@ -1197,12 +1249,19 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        count1 = count;
                if (count1 > (long)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(runtime->buffer + runtime->appl_ptr,
+                       memcpy(runtime->buffer + appl_ptr,
                               kernelbuf + result, count1);
                else if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
-                       if (copy_from_user(runtime->buffer + runtime->appl_ptr,
+                       if (copy_from_user(runtime->buffer + appl_ptr,
                                           userbuf + result, count1)) {
                                spin_lock_irqsave(&runtime->lock, flags);
                                result = result > 0 ? result : -EFAULT;
@@ -1210,9 +1269,6 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
                result += count1;
                count -= count1;
        }
index b1221b2..6779e82 100644 (file)
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
 
        dp->index = i;
        if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-               pr_err("ALSA: seq_oss: too many applications\n");
+               pr_debug("ALSA: seq_oss: too many applications\n");
                rc = -ENOMEM;
                goto _error;
        }
index 0f3b381..b16dbef 100644 (file)
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
        struct seq_oss_synth *rec;
        struct seq_oss_synthinfo *info;
 
-       if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+       if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
                return;
        for (i = 0; i < dp->max_synthdev; i++) {
                info = &dp->synths[i];
index 13cfa81..58e79e0 100644 (file)
@@ -678,6 +678,9 @@ static int deliver_to_subscribers(struct snd_seq_client *client,
        else
                down_read(&grp->list_mutex);
        list_for_each_entry(subs, &grp->list_head, src_list) {
+               /* both ports ready? */
+               if (atomic_read(&subs->ref_count) != 2)
+                       continue;
                event->dest = subs->info.dest;
                if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
                        /* convert time according to flag with subscription */
index 81f7c10..6517590 100644 (file)
@@ -49,11 +49,12 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned
        struct snd_seq_port_info *data;
        mm_segment_t fs;
 
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
 
-       if (get_user(data->flags, &data32->flags) ||
+       if (copy_from_user(data, data32, sizeof(*data32)) ||
+           get_user(data->flags, &data32->flags) ||
            get_user(data->time_queue, &data32->time_queue))
                goto error;
        data->kernel = NULL;
index 55170a2..921fb2b 100644 (file)
@@ -173,10 +173,6 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client,
 }
 
 /* */
-enum group_type {
-       SRC_LIST, DEST_LIST
-};
-
 static int subscribe_port(struct snd_seq_client *client,
                          struct snd_seq_client_port *port,
                          struct snd_seq_port_subs_info *grp,
@@ -203,6 +199,20 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
        return NULL;
 }
 
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack);
+
+static inline struct snd_seq_subscribers *
+get_subscriber(struct list_head *p, bool is_src)
+{
+       if (is_src)
+               return list_entry(p, struct snd_seq_subscribers, src_list);
+       else
+               return list_entry(p, struct snd_seq_subscribers, dest_list);
+}
+
 /*
  * remove all subscribers on the list
  * this is called from port_delete, for each src and dest list.
@@ -210,7 +220,7 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
 static void clear_subscriber_list(struct snd_seq_client *client,
                                  struct snd_seq_client_port *port,
                                  struct snd_seq_port_subs_info *grp,
-                                 int grptype)
+                                 int is_src)
 {
        struct list_head *p, *n;
 
@@ -219,15 +229,13 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                struct snd_seq_client *c;
                struct snd_seq_client_port *aport;
 
-               if (grptype == SRC_LIST) {
-                       subs = list_entry(p, struct snd_seq_subscribers, src_list);
+               subs = get_subscriber(p, is_src);
+               if (is_src)
                        aport = get_client_port(&subs->info.dest, &c);
-               } else {
-                       subs = list_entry(p, struct snd_seq_subscribers, dest_list);
+               else
                        aport = get_client_port(&subs->info.sender, &c);
-               }
-               list_del(p);
-               unsubscribe_port(client, port, grp, &subs->info, 0);
+               delete_and_unsubscribe_port(client, port, subs, is_src, false);
+
                if (!aport) {
                        /* looks like the connected port is being deleted.
                         * we decrease the counter, and when both ports are deleted
@@ -235,21 +243,14 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                         */
                        if (atomic_dec_and_test(&subs->ref_count))
                                kfree(subs);
-               } else {
-                       /* ok we got the connected port */
-                       struct snd_seq_port_subs_info *agrp;
-                       agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src;
-                       down_write(&agrp->list_mutex);
-                       if (grptype == SRC_LIST)
-                               list_del(&subs->dest_list);
-                       else
-                               list_del(&subs->src_list);
-                       up_write(&agrp->list_mutex);
-                       unsubscribe_port(c, aport, agrp, &subs->info, 1);
-                       kfree(subs);
-                       snd_seq_port_unlock(aport);
-                       snd_seq_client_unlock(c);
+                       continue;
                }
+
+               /* ok we got the connected port */
+               delete_and_unsubscribe_port(c, aport, subs, !is_src, true);
+               kfree(subs);
+               snd_seq_port_unlock(aport);
+               snd_seq_client_unlock(c);
        }
 }
 
@@ -262,8 +263,8 @@ static int port_delete(struct snd_seq_client *client,
        snd_use_lock_sync(&port->use_lock); 
 
        /* clear subscribers info */
-       clear_subscriber_list(client, port, &port->c_src, SRC_LIST);
-       clear_subscriber_list(client, port, &port->c_dest, DEST_LIST);
+       clear_subscriber_list(client, port, &port->c_src, true);
+       clear_subscriber_list(client, port, &port->c_dest, false);
 
        if (port->private_free)
                port->private_free(port->private_data);
@@ -479,85 +480,120 @@ static int match_subs_info(struct snd_seq_port_subscribe *r,
        return 0;
 }
 
-
-/* connect two ports */
-int snd_seq_port_connect(struct snd_seq_client *connector,
-                        struct snd_seq_client *src_client,
-                        struct snd_seq_client_port *src_port,
-                        struct snd_seq_client *dest_client,
-                        struct snd_seq_client_port *dest_port,
-                        struct snd_seq_port_subscribe *info)
+static int check_and_subscribe_port(struct snd_seq_client *client,
+                                   struct snd_seq_client_port *port,
+                                   struct snd_seq_subscribers *subs,
+                                   bool is_src, bool exclusive, bool ack)
 {
-       struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
-       struct snd_seq_subscribers *subs, *s;
-       int err, src_called = 0;
-       unsigned long flags;
-       int exclusive;
+       struct snd_seq_port_subs_info *grp;
+       struct list_head *p;
+       struct snd_seq_subscribers *s;
+       int err;
 
-       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
-       if (! subs)
-               return -ENOMEM;
-
-       subs->info = *info;
-       atomic_set(&subs->ref_count, 2);
-
-       down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
-       exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
+       grp = is_src ? &port->c_src : &port->c_dest;
        err = -EBUSY;
+       down_write(&grp->list_mutex);
        if (exclusive) {
-               if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head))
+               if (!list_empty(&grp->list_head))
                        goto __error;
        } else {
-               if (src->exclusive || dest->exclusive)
+               if (grp->exclusive)
                        goto __error;
                /* check whether already exists */
-               list_for_each_entry(s, &src->list_head, src_list) {
-                       if (match_subs_info(info, &s->info))
-                               goto __error;
-               }
-               list_for_each_entry(s, &dest->list_head, dest_list) {
-                       if (match_subs_info(info, &s->info))
+               list_for_each(p, &grp->list_head) {
+                       s = get_subscriber(p, is_src);
+                       if (match_subs_info(&subs->info, &s->info))
                                goto __error;
                }
        }
 
-       if ((err = subscribe_port(src_client, src_port, src, info,
-                                 connector->number != src_client->number)) < 0)
-               goto __error;
-       src_called = 1;
-
-       if ((err = subscribe_port(dest_client, dest_port, dest, info,
-                                 connector->number != dest_client->number)) < 0)
+       err = subscribe_port(client, port, grp, &subs->info, ack);
+       if (err < 0) {
+               grp->exclusive = 0;
                goto __error;
+       }
 
        /* add to list */
-       write_lock_irqsave(&src->list_lock, flags);
-       // write_lock(&dest->list_lock); // no other lock yet
-       list_add_tail(&subs->src_list, &src->list_head);
-       list_add_tail(&subs->dest_list, &dest->list_head);
-       // write_unlock(&dest->list_lock); // no other lock yet
-       write_unlock_irqrestore(&src->list_lock, flags);
+       write_lock_irq(&grp->list_lock);
+       if (is_src)
+               list_add_tail(&subs->src_list, &grp->list_head);
+       else
+               list_add_tail(&subs->dest_list, &grp->list_head);
+       grp->exclusive = exclusive;
+       atomic_inc(&subs->ref_count);
+       write_unlock_irq(&grp->list_lock);
+       err = 0;
+
+ __error:
+       up_write(&grp->list_mutex);
+       return err;
+}
 
-       src->exclusive = dest->exclusive = exclusive;
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack)
+{
+       struct snd_seq_port_subs_info *grp;
+
+       grp = is_src ? &port->c_src : &port->c_dest;
+       down_write(&grp->list_mutex);
+       write_lock_irq(&grp->list_lock);
+       if (is_src)
+               list_del(&subs->src_list);
+       else
+               list_del(&subs->dest_list);
+       grp->exclusive = 0;
+       write_unlock_irq(&grp->list_lock);
+       up_write(&grp->list_mutex);
+
+       unsubscribe_port(client, port, grp, &subs->info, ack);
+}
+
+/* connect two ports */
+int snd_seq_port_connect(struct snd_seq_client *connector,
+                        struct snd_seq_client *src_client,
+                        struct snd_seq_client_port *src_port,
+                        struct snd_seq_client *dest_client,
+                        struct snd_seq_client_port *dest_port,
+                        struct snd_seq_port_subscribe *info)
+{
+       struct snd_seq_subscribers *subs;
+       bool exclusive;
+       int err;
+
+       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
+       if (!subs)
+               return -ENOMEM;
+
+       subs->info = *info;
+       atomic_set(&subs->ref_count, 0);
+       INIT_LIST_HEAD(&subs->src_list);
+       INIT_LIST_HEAD(&subs->dest_list);
+
+       exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE);
+
+       err = check_and_subscribe_port(src_client, src_port, subs, true,
+                                      exclusive,
+                                      connector->number != src_client->number);
+       if (err < 0)
+               goto error;
+       err = check_and_subscribe_port(dest_client, dest_port, subs, false,
+                                      exclusive,
+                                      connector->number != dest_client->number);
+       if (err < 0)
+               goto error_dest;
 
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return 0;
 
__error:
-       if (src_called)
-               unsubscribe_port(src_client, src_port, src, info,
-                                connector->number != src_client->number);
error_dest:
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+ error:
        kfree(subs);
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return err;
 }
 
-
 /* remove the connection */
 int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_client *src_client,
@@ -567,37 +603,28 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_port_subscribe *info)
 {
        struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
        struct snd_seq_subscribers *subs;
        int err = -ENOENT;
-       unsigned long flags;
 
        down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
        /* look for the connection */
        list_for_each_entry(subs, &src->list_head, src_list) {
                if (match_subs_info(info, &subs->info)) {
-                       write_lock_irqsave(&src->list_lock, flags);
-                       // write_lock(&dest->list_lock);  // no lock yet
-                       list_del(&subs->src_list);
-                       list_del(&subs->dest_list);
-                       // write_unlock(&dest->list_lock);
-                       write_unlock_irqrestore(&src->list_lock, flags);
-                       src->exclusive = dest->exclusive = 0;
-                       unsubscribe_port(src_client, src_port, src, info,
-                                        connector->number != src_client->number);
-                       unsubscribe_port(dest_client, dest_port, dest, info,
-                                        connector->number != dest_client->number);
-                       kfree(subs);
+                       atomic_dec(&subs->ref_count); /* mark as not ready */
                        err = 0;
                        break;
                }
        }
-
-       up_write(&dest->list_mutex);
        up_write(&src->list_mutex);
-       return err;
+       if (err < 0)
+               return err;
+
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+       delete_and_unsubscribe_port(dest_client, dest_port, subs, false,
+                                   connector->number != dest_client->number);
+       kfree(subs);
+       return 0;
 }
 
 
index 82b220c..2931049 100644 (file)
@@ -90,6 +90,9 @@ void snd_seq_timer_delete(struct snd_seq_timer **tmr)
 
 void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&tmr->lock, flags);
        /* setup defaults */
        tmr->ppq = 96;          /* 96 PPQ */
        tmr->tempo = 500000;    /* 120 BPM */
@@ -105,21 +108,25 @@ void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
        tmr->preferred_resolution = seq_default_timer_resolution;
 
        tmr->skew = tmr->skew_base = SKEW_BASE;
+       spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
-void snd_seq_timer_reset(struct snd_seq_timer * tmr)
+static void seq_timer_reset(struct snd_seq_timer *tmr)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* reset time & songposition */
        tmr->cur_time.tv_sec = 0;
        tmr->cur_time.tv_nsec = 0;
 
        tmr->tick.cur_tick = 0;
        tmr->tick.fraction = 0;
+}
+
+void snd_seq_timer_reset(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
+       seq_timer_reset(tmr);
        spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
@@ -138,8 +145,11 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
        tmr = q->timer;
        if (tmr == NULL)
                return;
-       if (!tmr->running)
+       spin_lock_irqsave(&tmr->lock, flags);
+       if (!tmr->running) {
+               spin_unlock_irqrestore(&tmr->lock, flags);
                return;
+       }
 
        resolution *= ticks;
        if (tmr->skew != tmr->skew_base) {
@@ -148,8 +158,6 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
                        (((resolution & 0xffff) * tmr->skew) >> 16);
        }
 
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* update timer */
        snd_seq_inc_time_nsec(&tmr->cur_time, resolution);
 
@@ -296,26 +304,30 @@ int snd_seq_timer_open(struct snd_seq_queue *q)
        t->callback = snd_seq_timer_interrupt;
        t->callback_data = q;
        t->flags |= SNDRV_TIMER_IFLG_AUTO;
+       spin_lock_irq(&tmr->lock);
        tmr->timeri = t;
+       spin_unlock_irq(&tmr->lock);
        return 0;
 }
 
 int snd_seq_timer_close(struct snd_seq_queue *q)
 {
        struct snd_seq_timer *tmr;
+       struct snd_timer_instance *t;
        
        tmr = q->timer;
        if (snd_BUG_ON(!tmr))
                return -EINVAL;
-       if (tmr->timeri) {
-               snd_timer_stop(tmr->timeri);
-               snd_timer_close(tmr->timeri);
-               tmr->timeri = NULL;
-       }
+       spin_lock_irq(&tmr->lock);
+       t = tmr->timeri;
+       tmr->timeri = NULL;
+       spin_unlock_irq(&tmr->lock);
+       if (t)
+               snd_timer_close(t);
        return 0;
 }
 
-int snd_seq_timer_stop(struct snd_seq_timer * tmr)
+static int seq_timer_stop(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
@@ -326,6 +338,17 @@ int snd_seq_timer_stop(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_stop(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_stop(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 static int initialize_timer(struct snd_seq_timer *tmr)
 {
        struct snd_timer *t;
@@ -358,13 +381,13 @@ static int initialize_timer(struct snd_seq_timer *tmr)
        return 0;
 }
 
-int snd_seq_timer_start(struct snd_seq_timer * tmr)
+static int seq_timer_start(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
-               snd_seq_timer_stop(tmr);
-       snd_seq_timer_reset(tmr);
+               seq_timer_stop(tmr);
+       seq_timer_reset(tmr);
        if (initialize_timer(tmr) < 0)
                return -EINVAL;
        snd_timer_start(tmr->timeri, tmr->ticks);
@@ -373,14 +396,25 @@ int snd_seq_timer_start(struct snd_seq_timer * tmr)
        return 0;
 }
 
-int snd_seq_timer_continue(struct snd_seq_timer * tmr)
+int snd_seq_timer_start(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_start(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
+static int seq_timer_continue(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
                return -EBUSY;
        if (! tmr->initialized) {
-               snd_seq_timer_reset(tmr);
+               seq_timer_reset(tmr);
                if (initialize_timer(tmr) < 0)
                        return -EINVAL;
        }
@@ -390,11 +424,24 @@ int snd_seq_timer_continue(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_continue(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_continue(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 /* return current 'real' time. use timeofday() to get better granularity. */
 snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
 {
        snd_seq_real_time_t cur_time;
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
        cur_time = tmr->cur_time;
        if (tmr->running) { 
                struct timeval tm;
@@ -410,7 +457,7 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
                }
                snd_seq_sanity_real_time(&cur_time);
        }
-                
+       spin_unlock_irqrestore(&tmr->lock, flags);
        return cur_time;        
 }
 
index 3da2d48..c82ed3e 100644 (file)
@@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
        struct snd_virmidi *vmidi = substream->runtime->private_data;
        int count, res;
        unsigned char buf[32], *pbuf;
+       unsigned long flags;
 
        if (up) {
                vmidi->trigger = 1;
                if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
                    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
-                       snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail);
-                       return;         /* ignored */
+                       while (snd_rawmidi_transmit(substream, buf,
+                                                   sizeof(buf)) > 0) {
+                               /* ignored */
+                       }
+                       return;
                }
                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
                                return;
                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                }
+               spin_lock_irqsave(&substream->runtime->lock, flags);
                while (1) {
-                       count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
+                       count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
                        if (count <= 0)
                                break;
                        pbuf = buf;
@@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
                                        snd_midi_event_reset_encode(vmidi->parser);
                                        continue;
                                }
-                               snd_rawmidi_transmit_ack(substream, res);
+                               __snd_rawmidi_transmit_ack(substream, res);
                                pbuf += res;
                                count -= res;
                                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-                                               return;
+                                               goto out;
                                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                                }
                        }
                }
+       out:
+               spin_unlock_irqrestore(&substream->runtime->lock, flags);
        } else {
                vmidi->trigger = 0;
        }
@@ -254,9 +261,13 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream)
  */
 static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream)
 {
+       struct snd_virmidi_dev *rdev = substream->rmidi->private_data;
        struct snd_virmidi *vmidi = substream->runtime->private_data;
-       snd_midi_event_free(vmidi->parser);
+
+       write_lock_irq(&rdev->filelist_lock);
        list_del(&vmidi->list);
+       write_unlock_irq(&rdev->filelist_lock);
+       snd_midi_event_free(vmidi->parser);
        substream->runtime->private_data = NULL;
        kfree(vmidi);
        return 0;
index cb25ade..9b513a0 100644 (file)
@@ -65,6 +65,7 @@ struct snd_timer_user {
        int qtail;
        int qused;
        int queue_size;
+       bool disconnected;
        struct snd_timer_read *queue;
        struct snd_timer_tread *tqueue;
        spinlock_t qlock;
@@ -290,6 +291,9 @@ int snd_timer_open(struct snd_timer_instance **ti,
                mutex_unlock(&register_mutex);
                return -ENOMEM;
        }
+       /* take a card refcount for safe disconnection */
+       if (timer->card)
+               get_device(&timer->card->card_dev);
        timeri->slave_class = tid->dev_sclass;
        timeri->slave_id = slave_id;
        if (list_empty(&timer->open_list_head) && timer->hw.open)
@@ -359,6 +363,9 @@ int snd_timer_close(struct snd_timer_instance *timeri)
                }
                spin_unlock(&timer->lock);
                spin_unlock_irq(&slave_active_lock);
+               /* release a card refcount for safe disconnection */
+               if (timer->card)
+                       put_device(&timer->card->card_dev);
                mutex_unlock(&register_mutex);
        }
  out:
@@ -444,6 +451,10 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri)
        unsigned long flags;
 
        spin_lock_irqsave(&slave_active_lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               spin_unlock_irqrestore(&slave_active_lock, flags);
+               return -EBUSY;
+       }
        timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
        if (timeri->master && timeri->timer) {
                spin_lock(&timeri->timer->lock);
@@ -468,18 +479,28 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks)
                return -EINVAL;
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                result = snd_timer_start_slave(timeri);
-               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+               if (result >= 0)
+                       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
                return result;
        }
        timer = timeri->timer;
        if (timer == NULL)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                            SNDRV_TIMER_IFLG_START)) {
+               result = -EBUSY;
+               goto unlock;
+       }
        timeri->ticks = timeri->cticks = ticks;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, ticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
-       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+       if (result >= 0)
+               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
        return result;
 }
 
@@ -493,6 +514,10 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
 
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                spin_lock_irqsave(&slave_active_lock, flags);
+               if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
+                       spin_unlock_irqrestore(&slave_active_lock, flags);
+                       return -EBUSY;
+               }
                timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
                list_del_init(&timeri->ack_list);
                list_del_init(&timeri->active_list);
@@ -503,8 +528,17 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
        if (!timer)
                return -EINVAL;
        spin_lock_irqsave(&timer->lock, flags);
+       if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                              SNDRV_TIMER_IFLG_START))) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return -EBUSY;
+       }
        list_del_init(&timeri->ack_list);
        list_del_init(&timeri->active_list);
+       if (timer->card && timer->card->shutdown) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return 0;
+       }
        if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) &&
            !(--timer->running)) {
                timer->hw.stop(timer);
@@ -565,11 +599,18 @@ int snd_timer_continue(struct snd_timer_instance *timeri)
        timer = timeri->timer;
        if (! timer)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               result = -EBUSY;
+               goto unlock;
+       }
        if (!timeri->cticks)
                timeri->cticks = 1;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, timer->sticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
        snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE);
        return result;
@@ -628,6 +669,9 @@ static void snd_timer_tasklet(unsigned long arg)
        unsigned long resolution, ticks;
        unsigned long flags;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
        /* now process all callbacks */
        while (!list_empty(&timer->sack_list_head)) {
@@ -668,6 +712,9 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
        if (timer == NULL)
                return;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
 
        /* remember the current resolution */
@@ -697,8 +744,8 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
                        ti->cticks = ti->ticks;
                } else {
                        ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
-                       if (--timer->running)
-                               list_del_init(&ti->active_list);
+                       --timer->running;
+                       list_del_init(&ti->active_list);
                }
                if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) ||
                    (ti->flags & SNDRV_TIMER_IFLG_FAST))
@@ -881,8 +928,15 @@ static int snd_timer_dev_register(struct snd_device *dev)
 static int snd_timer_dev_disconnect(struct snd_device *device)
 {
        struct snd_timer *timer = device->device_data;
+       struct snd_timer_instance *ti;
+
        mutex_lock(&register_mutex);
        list_del_init(&timer->device_list);
+       /* wake up pending sleepers */
+       list_for_each_entry(ti, &timer->open_list_head, open_list) {
+               if (ti->disconnect)
+                       ti->disconnect(ti);
+       }
        mutex_unlock(&register_mutex);
        return 0;
 }
@@ -893,6 +947,8 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam
        unsigned long resolution = 0;
        struct snd_timer_instance *ti, *ts;
 
+       if (timer->card && timer->card->shutdown)
+               return;
        if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE))
                return;
        if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART ||
@@ -1002,11 +1058,21 @@ static int snd_timer_s_stop(struct snd_timer * timer)
        return 0;
 }
 
+static int snd_timer_s_close(struct snd_timer *timer)
+{
+       struct snd_timer_system_private *priv;
+
+       priv = (struct snd_timer_system_private *)timer->private_data;
+       del_timer_sync(&priv->tlist);
+       return 0;
+}
+
 static struct snd_timer_hardware snd_timer_system =
 {
        .flags =        SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_TASKLET,
        .resolution =   1000000000L / HZ,
        .ticks =        10000000L,
+       .close =        snd_timer_s_close,
        .start =        snd_timer_s_start,
        .stop =         snd_timer_s_stop
 };
@@ -1051,6 +1117,8 @@ static void snd_timer_proc_read(struct snd_info_entry *entry,
 
        mutex_lock(&register_mutex);
        list_for_each_entry(timer, &snd_timer_list, device_list) {
+               if (timer->card && timer->card->shutdown)
+                       continue;
                switch (timer->tmr_class) {
                case SNDRV_TIMER_CLASS_GLOBAL:
                        snd_iprintf(buffer, "G%i: ", timer->tmr_device);
@@ -1185,6 +1253,14 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
        wake_up(&tu->qchange_sleep);
 }
 
+static void snd_timer_user_disconnect(struct snd_timer_instance *timeri)
+{
+       struct snd_timer_user *tu = timeri->callback_data;
+
+       tu->disconnected = true;
+       wake_up(&tu->qchange_sleep);
+}
+
 static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
                                      unsigned long resolution,
                                      unsigned long ticks)
@@ -1558,6 +1634,7 @@ static int snd_timer_user_tselect(struct file *file,
                        ? snd_timer_user_tinterrupt : snd_timer_user_interrupt;
                tu->timeri->ccallback = snd_timer_user_ccallback;
                tu->timeri->callback_data = (void *)tu;
+               tu->timeri->disconnect = snd_timer_user_disconnect;
        }
 
       __err:
@@ -1876,6 +1953,10 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
                        remove_wait_queue(&tu->qchange_sleep, &wait);
 
+                       if (tu->disconnected) {
+                               err = -ENODEV;
+                               break;
+                       }
                        if (signal_pending(current)) {
                                err = -ERESTARTSYS;
                                break;
@@ -1925,6 +2006,8 @@ static unsigned int snd_timer_user_poll(struct file *file, poll_table * wait)
        mask = 0;
        if (tu->qused)
                mask |= POLLIN | POLLRDNORM;
+       if (tu->disconnected)
+               mask |= POLLERR;
 
        return mask;
 }
index 75b7485..bde3330 100644 (file)
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver.");
 module_param(fake_buffer, bool, 0444);
 MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
 #ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
 MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
 #endif
 
index 926e5dc..5022c9b 100644 (file)
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
        [6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-               if (snd_bebob_rate_table[i] == rate)
-                       return i;
+               if (snd_bebob_rate_table[i] == rate) {
+                       *index = i;
+                       return 0;
+               }
        }
        return -EINVAL;
 }
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
                goto end;
 
        /* confirm params for both streams */
-       index = get_formation_index(rate);
+       err = get_formation_index(rate, &index);
+       if (err < 0)
+               goto end;
        pcm_channels = bebob->tx_stream_formations[index].pcm;
        midi_channels = bebob->tx_stream_formations[index].midi;
        err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
index c50177f..f6854db 100644 (file)
@@ -306,7 +306,7 @@ out_master_del:
 out_err:
        kfree(acomp);
        bus->audio_component = NULL;
-       dev_err(dev, "failed to add i915 component master (%d)\n", ret);
+       dev_info(dev, "failed to add i915 component master (%d)\n", ret);
 
        return ret;
 }
index 0216475..37adcc6 100644 (file)
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+       select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Analog Devices SoundPort
          AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
        tristate "Gravis UltraSound Classic"
        select SND_RAWMIDI
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Classic
          soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Extreme
          soundcards.
index 656ce39..8f6594a 100644 (file)
@@ -155,6 +155,7 @@ config SND_AZT3328
        select SND_PCM
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
        select SND_HWDEP
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_AC97_CODEC
+       select SND_TIMER
        help
          Say Y here to include support for Yamaha PCI audio chips -
          YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
index 28e2f8b..8914534 100644 (file)
@@ -1141,6 +1141,14 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
                emu->emu1010.firmware_thread =
                        kthread_create(emu1010_firmware_thread, emu,
                                       "emu1010_firmware");
+               if (IS_ERR(emu->emu1010.firmware_thread)) {
+                       err = PTR_ERR(emu->emu1010.firmware_thread);
+                       emu->emu1010.firmware_thread = NULL;
+                       dev_info(emu->card->dev,
+                                       "emu1010: Creating thread failed\n");
+                       return err;
+               }
+
                wake_up_process(emu->emu1010.firmware_thread);
        }
 
index 70671ad..6efadbf 100644 (file)
@@ -174,14 +174,40 @@ static inline bool codec_probed(struct hda_codec *codec)
        return device_attach(hda_codec_dev(codec)) > 0 && codec->preset;
 }
 
-/* try to auto-load and bind the codec module */
-static void codec_bind_module(struct hda_codec *codec)
+/* try to auto-load codec module */
+static void request_codec_module(struct hda_codec *codec)
 {
 #ifdef MODULE
        char modalias[32];
+       const char *mod = NULL;
+
+       switch (codec->probe_id) {
+       case HDA_CODEC_ID_GENERIC_HDMI:
+#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
+               mod = "snd-hda-codec-hdmi";
+#endif
+               break;
+       case HDA_CODEC_ID_GENERIC:
+#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
+               mod = "snd-hda-codec-generic";
+#endif
+               break;
+       default:
+               snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
+               mod = modalias;
+               break;
+       }
+
+       if (mod)
+               request_module(mod);
+#endif /* MODULE */
+}
 
-       snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
-       request_module(modalias);
+/* try to auto-load and bind the codec module */
+static void codec_bind_module(struct hda_codec *codec)
+{
+#ifdef MODULE
+       request_codec_module(codec);
        if (codec_probed(codec))
                return;
 #endif
@@ -218,17 +244,13 @@ static int codec_bind_generic(struct hda_codec *codec)
 
        if (is_likely_hdmi_codec(codec)) {
                codec->probe_id = HDA_CODEC_ID_GENERIC_HDMI;
-#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
-               request_module("snd-hda-codec-hdmi");
-#endif
+               request_codec_module(codec);
                if (codec_probed(codec))
                        return 0;
        }
 
        codec->probe_id = HDA_CODEC_ID_GENERIC;
-#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
-       request_module("snd-hda-codec-generic");
-#endif
+       request_codec_module(codec);
        if (codec_probed(codec))
                return 0;
        return -ENODEV;
index c0bef11..4045dca 100644 (file)
@@ -90,6 +90,8 @@ enum {
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL         0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
+       u32 val;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, true);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        azx_init_chip(chip, full_reset);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, false);
 
@@ -2078,9 +2091,11 @@ static int azx_probe_continue(struct azx *chip)
                         * for other chips, still continue probing as other
                         * codecs can be on the same link.
                         */
-                       if (CONTROLLER_IN_GPU(pci))
+                       if (CONTROLLER_IN_GPU(pci)) {
+                               dev_err(chip->card->dev,
+                                       "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n");
                                goto out_free;
-                       else
+                       else
                                goto skip_i915;
                }
 
@@ -2149,9 +2164,17 @@ i915_power_fail:
 static void azx_remove(struct pci_dev *pci)
 {
        struct snd_card *card = pci_get_drvdata(pci);
+       struct azx *chip;
+       struct hda_intel *hda;
+
+       if (card) {
+               /* flush the pending probing work */
+               chip = card->private_data;
+               hda = container_of(chip, struct hda_intel, chip);
+               flush_work(&hda->probe_work);
 
-       if (card)
                snd_card_free(card);
+       }
 }
 
 static void azx_shutdown(struct pci_dev *pci)
index a12ae8a..c1c855a 100644 (file)
@@ -614,6 +614,7 @@ enum {
        CS4208_MAC_AUTO,
        CS4208_MBA6,
        CS4208_MBP11,
+       CS4208_MACMINI,
        CS4208_GPIO0,
 };
 
@@ -621,6 +622,7 @@ static const struct hda_model_fixup cs4208_models[] = {
        { .id = CS4208_GPIO0, .name = "gpio0" },
        { .id = CS4208_MBA6, .name = "mba6" },
        { .id = CS4208_MBP11, .name = "mbp11" },
+       { .id = CS4208_MACMINI, .name = "macmini" },
        {}
 };
 
@@ -632,6 +634,7 @@ static const struct snd_pci_quirk cs4208_fixup_tbl[] = {
 /* codec SSID matching */
 static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = {
        SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11),
+       SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI),
        SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11),
@@ -666,6 +669,24 @@ static void cs4208_fixup_mac(struct hda_codec *codec,
        snd_hda_apply_fixup(codec, action);
 }
 
+/* MacMini 7,1 has the inverted jack detection */
+static void cs4208_fixup_macmini(struct hda_codec *codec,
+                                const struct hda_fixup *fix, int action)
+{
+       static const struct hda_pintbl pincfgs[] = {
+               { 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */
+               { 0x21, 0x004be140 }, /* SPDIF: disable detect */
+               { }
+       };
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               /* HP pin (0x10) has an inverted detection */
+               codec->inv_jack_detect = 1;
+               /* disable the bogus Mic and SPDIF jack detections */
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       }
+}
+
 static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol,
                               struct snd_ctl_elem_value *ucontrol)
 {
@@ -709,6 +730,12 @@ static const struct hda_fixup cs4208_fixups[] = {
                .chained = true,
                .chain_id = CS4208_GPIO0,
        },
+       [CS4208_MACMINI] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs4208_fixup_macmini,
+               .chained = true,
+               .chain_id = CS4208_GPIO0,
+       },
        [CS4208_GPIO0] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs4208_fixup_gpio0,
index 426a29a..1f52b55 100644 (file)
@@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",     patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",  patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",      patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",   patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",   patch_via_hdmi),
index 8143c0e..21992fb 100644 (file)
@@ -327,6 +327,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
+       case 0x10ec0225:
        case 0x10ec0233:
        case 0x10ec0255:
        case 0x10ec0256:
@@ -900,6 +901,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
        { 0x10ec0899, 0x1028, 0, "ALC3861" },
        { 0x10ec0298, 0x1028, 0, "ALC3266" },
        { 0x10ec0256, 0x1028, 0, "ALC3246" },
+       { 0x10ec0225, 0x1028, 0, "ALC3253" },
        { 0x10ec0670, 0x1025, 0, "ALC669X" },
        { 0x10ec0676, 0x1025, 0, "ALC679X" },
        { 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -2651,6 +2653,7 @@ enum {
        ALC269_TYPE_ALC298,
        ALC269_TYPE_ALC255,
        ALC269_TYPE_ALC256,
+       ALC269_TYPE_ALC225,
 };
 
 /*
@@ -2680,6 +2683,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
        case ALC269_TYPE_ALC298:
        case ALC269_TYPE_ALC255:
        case ALC269_TYPE_ALC256:
+       case ALC269_TYPE_ALC225:
                ssids = alc269_ssids;
                break;
        default:
@@ -3658,6 +3662,16 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
                WRITE_COEF(0xb7, 0x802b),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x4a, 1<<8, 0),
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 0),
+               UPDATE_COEF(0x63, 3<<14, 3<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x4a, 3<<10, 3<<10),
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x4a, 3<<10, 0),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3682,6 +3696,9 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0668);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to unplugged mode.\n");
 }
@@ -3727,6 +3744,13 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                UPDATE_COEF(0xc3, 0, 1<<12),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x63, 3<<14, 0),
+               {}
+       };
+
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3772,6 +3796,12 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                alc_process_coef_fw(codec, coef0688);
                snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
                break;
+       case 0x10ec0225:
+               alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10);
+               snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+               alc_process_coef_fw(codec, coef0225);
+               snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+               break;
        }
        codec_dbg(codec, "Headset jack set to mic-in mode.\n");
 }
@@ -3884,6 +3914,13 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3912,6 +3949,9 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to iPhone-style headset mode.\n");
 }
@@ -3955,6 +3995,13 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x39<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3983,6 +4030,9 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to Nokia-style headset mode.\n");
 }
@@ -4014,6 +4064,11 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0c00),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -4058,6 +4113,12 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                val = alc_read_coef_idx(codec, 0xbe);
                is_ctia = (val & 0x1c02) == 0x1c02;
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               msleep(800);
+               val = alc_read_coef_idx(codec, 0x46);
+               is_ctia = (val & 0x00f0) == 0x00f0;
+               break;
        }
 
        codec_dbg(codec, "Headset jack detected iPhone-style headset: %s\n",
@@ -5560,6 +5621,9 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
        {}
 };
+#define ALC225_STANDARD_PINS \
+       {0x12, 0xb7a60130}, \
+       {0x21, 0x04211020}
 
 #define ALC256_STANDARD_PINS \
        {0x12, 0x90a60140}, \
@@ -5581,6 +5645,12 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701a0}),
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701b0}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
                {0x14, 0x90170110},
                {0x21, 0x02211020}),
@@ -5906,6 +5976,9 @@ static int patch_alc269(struct hda_codec *codec)
                spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
                break;
+       case 0x10ec0225:
+               spec->codec_variant = ALC269_TYPE_ALC225;
+               break;
        }
 
        if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6566,6 +6639,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
        SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+       SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
        SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
        SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
        SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
@@ -6795,6 +6869,7 @@ static int patch_alc680(struct hda_codec *codec)
  */
 static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0221, "ALC221", patch_alc269),
+       HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
index d75deba..dfcd386 100644 (file)
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
 config SND_SUN_CS4231
        tristate "Sun CS4231"
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for CS4231 sound device on Sun.
 
index 3952236..fac7e6e 100644 (file)
@@ -221,6 +221,8 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream)
        runtime->hw = snd_at73c213_playback_hw;
        chip->substream = substream;
 
+       clk_enable(chip->ssc->clk);
+
        return 0;
 }
 
@@ -228,6 +230,7 @@ static int snd_at73c213_pcm_close(struct snd_pcm_substream *substream)
 {
        struct snd_at73c213 *chip = snd_pcm_substream_chip(substream);
        chip->substream = NULL;
+       clk_disable(chip->ssc->clk);
        return 0;
 }
 
@@ -897,6 +900,8 @@ static int snd_at73c213_dev_init(struct snd_card *card,
        chip->card = card;
        chip->irq = -1;
 
+       clk_enable(chip->ssc->clk);
+
        retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip);
        if (retval) {
                dev_dbg(&chip->spi->dev, "unable to request irq %d\n", irq);
@@ -935,6 +940,8 @@ out_irq:
        free_irq(chip->irq, chip);
        chip->irq = -1;
 out:
+       clk_disable(chip->ssc->clk);
+
        return retval;
 }
 
@@ -1012,7 +1019,9 @@ static int snd_at73c213_remove(struct spi_device *spi)
        int retval;
 
        /* Stop playback. */
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
 
        /* Mute sound. */
        retval = snd_at73c213_write_reg(chip, DAC_LMPG, 0x3f);
@@ -1080,6 +1089,7 @@ static int snd_at73c213_suspend(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
        clk_disable(chip->board->dac_clk);
 
        return 0;
@@ -1091,6 +1101,7 @@ static int snd_at73c213_resume(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        clk_enable(chip->board->dac_clk);
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN));
 
        return 0;
index 23ea6d8..4f6ce1c 100644 (file)
@@ -1121,6 +1121,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        switch (chip->usb_id) {
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
        case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
        case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
@@ -1205,8 +1206,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
         * "Playback Design" products need a 50ms delay after setting the
         * USB interface.
         */
-       if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+       switch (le16_to_cpu(dev->descriptor.idVendor)) {
+       case 0x23ba: /* Playback Design */
+       case 0x0644: /* TEAC Corp. */
                mdelay(50);
+               break;
+       }
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1226,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                mdelay(20);
 
+       /*
+        * "TEAC Corp." products need a 20ms delay after each
+        * class compliant request
+        */
+       if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+           (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+               mdelay(20);
+
        /* Marantz/Denon devices with USB DAC functionality need a delay
         * after each class compliant request
         */
@@ -1269,7 +1282,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
        case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
        case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
-       case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/
+       case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
                if (fp->altsetting == 2)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
@@ -1278,6 +1291,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
        case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
        case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
+       case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index ea69ce3..c3bd294 100644 (file)
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 0a22407..5d34815 100644 (file)
@@ -77,6 +77,9 @@ include config/utilities.mak
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
        $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
        $(call QUIET_CLEAN, libbpf)
@@ -610,6 +622,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+       @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+       @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+       @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
 #
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:
index 3e89ba8..7f064eb 100644 (file)
@@ -17,7 +17,7 @@ static pid_t spawn(void)
        if (pid)
                return pid;
 
-       while(1);
+       while(1)
                sleep(5);
        return 0;
 }
index e5959c1..511141b 100644 (file)
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
index df38dec..f918015 100644 (file)
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
        @echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,26 @@ else
 endif
 else
 PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc    := $(test_ok)
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do                         \
@@ -227,7 +247,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
        ( eval $$cmd ) >> $@ 2>&1 && \
        rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
-       @echo "- make -C <kernelsrc> tools/perf"
+       @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
        $(call clean); \
-       (make -C ../.. tools/perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-       @echo "- make -C <kernelsrc>/tools perf"
+       @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
        $(call clean); \
-       (make -C ../../tools perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
index d4d7cc2..718bd46 100644 (file)
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
                                nd = browser->curr_hot;
                        break;
                case K_UNTAB:
-                       if (nd != NULL)
+                       if (nd != NULL) {
                                nd = rb_next(nd);
                                if (nd == NULL)
                                        nd = rb_first(&browser->entries);
-                       else
+                       else
                                nd = browser->curr_hot;
                        break;
                case K_F1:
index c226303..68a7612 100644 (file)
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                        symlen = unresolved_col_width + 4 + 2;
                        hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
                                           symlen);
+                       hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+                                          symlen);
                }
 
                if (h->mem_info->iaddr.sym) {
index d5636ba..40b7a0d 100644 (file)
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
                machine = machines__find(machines, pid);
                if (!machine)
-                       machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+                       machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
                return machine;
        }
 
index 2f901d1..2b58edc 100644 (file)
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        int i, ret;
 
        aggr->val = aggr->ena = aggr->run = 0;
-       init_stats(ps->res_stats);
 
        if (counter->per_pkg)
                zero_per_pkg(counter);
index 3b2de6e..ab02209 100644 (file)
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
index 8ff7d62..33b52ea 100644 (file)
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 7ec7df9..0c1a7e6 100644 (file)
@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
        struct nfit_test_resource *nfit_res = get_nfit_res(addr);
 
index e86d937..60fe3c5 100644 (file)
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+       return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano) {
+               tmx.modes |= ADJ_NANO;
+
+               tmx.time.tv_sec = offset / NSEC_PER_SEC;
+               tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += NSEC_PER_SEC;
+               }
+       } else {
+               tmx.time.tv_sec = offset / USEC_PER_SEC;
+               tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += USEC_PER_SEC;
+               }
+       }
+
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret < 0) {
+               printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano)
+               tmx.modes |= ADJ_NANO;
+
+       tmx.time.tv_sec = sec;
+       tmx.time.tv_usec = usec;
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret >= 0) {
+               printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int validate_set_offset(void)
+{
+       printf("Testing ADJ_SETOFFSET... ");
+
+       /* Test valid values */
+       if (set_offset(NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC + 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC + 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+               return -1;
+
+       /* Test invalid values */
+       if (set_bad_offset(0, -1, 1))
+               return -1;
+       if (set_bad_offset(0, -1, 0))
+               return -1;
+       if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, -USEC_PER_SEC, 0))
+               return -1;
+
+       printf("[OK]\n");
+       return 0;
+}
+
 int main(int argc, char **argv)
 {
        if (validate_freq())
                return ksft_exit_fail();
 
+       if (validate_set_offset())
+               return ksft_exit_fail();
+
        return ksft_exit_pass();
 }
index 26b7926..ba34f9e 100644 (file)
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()       mb()
-# define dma_rmb()     barrier()
-# define dma_wmb()     barrier()
-# define smp_rmb()     barrier()
-# define smp_wmb()     barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+       typeof(var) virt_store_mb_value = (value); \
+       __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+                         __ATOMIC_SEQ_CST); \
+       barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644 (file)
index 0000000..845960e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+       (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
index 4db7d56..0338499 100644 (file)
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644 (file)
index 0000000..feaa64a
--- /dev/null
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+       -rm main.o
+       -rm ring.o ring
+       -rm virtio_ring_0_9.o virtio_ring_0_9
+       -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644 (file)
index 0000000..34e94c4
--- /dev/null
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644 (file)
index 0000000..3a5ff43
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = write(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = read(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void kick(void)
+{
+       notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+       wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+       notify(callfd);
+}
+
+void wait_for_call(void)
+{
+       wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+       cpu_set_t cpuset;
+       int ret;
+       pthread_t self;
+       long int cpu;
+       char *endptr;
+
+       if (!arg)
+               return;
+
+       cpu = strtol(arg, &endptr, 0);
+       assert(!*endptr);
+
+       assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+       self = pthread_self();
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+
+       ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+       assert(!ret);
+}
+
+static void run_guest(void)
+{
+       int completed_before;
+       int completed = 0;
+       int started = 0;
+       int bufs = runcycles;
+       int spurious = 0;
+       int r;
+       unsigned len;
+       void *buf;
+       int tokick = batch;
+
+       for (;;) {
+               if (do_sleep)
+                       disable_call();
+               completed_before = completed;
+               do {
+                       if (started < bufs &&
+                           started - completed < max_outstanding) {
+                               r = add_inbuf(0, NULL, "Hello, world!");
+                               if (__builtin_expect(r == 0, true)) {
+                                       ++started;
+                                       if (!--tokick) {
+                                               tokick = batch;
+                                               if (do_sleep)
+                                                       kick_available();
+                                       }
+
+                               }
+                       } else
+                               r = -1;
+
+                       /* Flush out completed bufs if any */
+                       if (get_buf(&len, &buf)) {
+                               ++completed;
+                               if (__builtin_expect(completed == bufs, false))
+                                       return;
+                               r = 0;
+                       }
+               } while (r == 0);
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               assert(started <= bufs);
+               if (do_sleep) {
+                       if (enable_call())
+                               wait_for_call();
+               } else {
+                       poll_used();
+               }
+       }
+}
+
+static void run_host(void)
+{
+       int completed_before;
+       int completed = 0;
+       int spurious = 0;
+       int bufs = runcycles;
+       unsigned len;
+       void *buf;
+
+       for (;;) {
+               if (do_sleep) {
+                       if (enable_kick())
+                               wait_for_kick();
+               } else {
+                       poll_avail();
+               }
+               if (do_sleep)
+                       disable_kick();
+               completed_before = completed;
+               while (__builtin_expect(use_buf(&len, &buf), true)) {
+                       if (do_sleep)
+                               call_used();
+                       ++completed;
+                       if (__builtin_expect(completed == bufs, false))
+                               return;
+               }
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               if (completed == bufs)
+                       break;
+       }
+}
+
+void *start_guest(void *arg)
+{
+       set_affinity(arg);
+       run_guest();
+       pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+       set_affinity(arg);
+       run_host();
+       pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+       {
+               .name = "help",
+               .has_arg = no_argument,
+               .val = 'h',
+       },
+       {
+               .name = "host-affinity",
+               .has_arg = required_argument,
+               .val = 'H',
+       },
+       {
+               .name = "guest-affinity",
+               .has_arg = required_argument,
+               .val = 'G',
+       },
+       {
+               .name = "ring-size",
+               .has_arg = required_argument,
+               .val = 'R',
+       },
+       {
+               .name = "run-cycles",
+               .has_arg = required_argument,
+               .val = 'C',
+       },
+       {
+               .name = "outstanding",
+               .has_arg = required_argument,
+               .val = 'o',
+       },
+       {
+               .name = "batch",
+               .has_arg = required_argument,
+               .val = 'b',
+       },
+       {
+               .name = "sleep",
+               .has_arg = no_argument,
+               .val = 's',
+       },
+       {
+               .name = "relax",
+               .has_arg = no_argument,
+               .val = 'x',
+       },
+       {
+               .name = "exit",
+               .has_arg = no_argument,
+               .val = 'e',
+       },
+       {
+       }
+};
+
+static void help(void)
+{
+       fprintf(stderr, "Usage: <test> [--help]"
+               " [--host-affinity H]"
+               " [--guest-affinity G]"
+               " [--ring-size R (default: %d)]"
+               " [--run-cycles C (default: %d)]"
+               " [--batch b]"
+               " [--outstanding o]"
+               " [--sleep]"
+               " [--relax]"
+               " [--exit]"
+               "\n",
+               ring_size,
+               runcycles);
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+       pthread_t host, guest;
+       void *tret;
+       char *host_arg = NULL;
+       char *guest_arg = NULL;
+       char *endptr;
+       long int c;
+
+       kickfd = eventfd(0, 0);
+       assert(kickfd >= 0);
+       callfd = eventfd(0, 0);
+       assert(callfd >= 0);
+
+       for (;;) {
+               int o = getopt_long(argc, argv, optstring, longopts, NULL);
+               switch (o) {
+               case -1:
+                       goto done;
+               case '?':
+                       help();
+                       exit(2);
+               case 'H':
+                       host_arg = optarg;
+                       break;
+               case 'G':
+                       guest_arg = optarg;
+                       break;
+               case 'R':
+                       ring_size = strtol(optarg, &endptr, 0);
+                       assert(ring_size && !(ring_size & (ring_size - 1)));
+                       assert(!*endptr);
+                       break;
+               case 'C':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       runcycles = c;
+                       break;
+               case 'o':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       max_outstanding = c;
+                       break;
+               case 'b':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       batch = c;
+                       break;
+               case 's':
+                       do_sleep = true;
+                       break;
+               case 'x':
+                       do_relax = true;
+                       break;
+               case 'e':
+                       do_exit = true;
+                       break;
+               default:
+                       help();
+                       exit(4);
+                       break;
+               }
+       }
+
+       /* does nothing here, used to make sure all smp APIs compile */
+       smp_acquire();
+       smp_release();
+       smp_mb();
+done:
+
+       if (batch > max_outstanding)
+               batch = max_outstanding;
+
+       if (optind < argc) {
+               help();
+               exit(4);
+       }
+       alloc_ring();
+
+       ret = pthread_create(&host, NULL, start_host, host_arg);
+       assert(!ret);
+       ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+       assert(!ret);
+
+       ret = pthread_join(guest, &tret);
+       assert(!ret);
+       ret = pthread_join(host, &tret);
+       assert(!ret);
+       return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644 (file)
index 0000000..16917ac
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+       unsigned long long t;
+
+       t = __rdtsc();
+       while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+       _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+       if (do_relax)
+               cpu_relax();
+       else
+               /* prevent compiler from removing busy loops */
+               barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644 (file)
index 0000000..c25c8d2
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+                             unsigned short next,
+                             unsigned short prev)
+{
+       return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+       unsigned short flags;
+       unsigned short index;
+       unsigned len;
+       unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+       unsigned short kick_index;
+       unsigned char reserved0[HOST_GUEST_PADDING - 2];
+       unsigned short call_index;
+       unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+       void *buf; /* descriptor is writeable, we can't get buf from there */
+       void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+       unsigned avail_idx;
+       unsigned last_used_idx;
+       unsigned num_free;
+       unsigned kicked_avail_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned used_idx;
+       unsigned called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+
+       ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       event = malloc(sizeof *event);
+       if (!event) {
+               perror("Unable to allocate event buffer.\n");
+               exit(3);
+       }
+       memset(event, 0, sizeof *event);
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       for (i = 0; i < ring_size; ++i) {
+               struct desc desc = {
+                       .index = i,
+               };
+               ring[i] = desc;
+       }
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, index;
+
+       if (!guest.num_free)
+               return -1;
+
+       guest.num_free--;
+       head = (ring_size - 1) & (guest.avail_idx++);
+
+       /* Start with a write. On MESI architectures this helps
+        * avoid a shared state with consumer that is polling this descriptor.
+        */
+       ring[head].addr = (unsigned long)(void*)buf;
+       ring[head].len = len;
+       /* read below might bypass write above. That is OK because it's just an
+        * optimization. If this happens, we will get the cache line in a
+        * shared state which is unfortunate, but probably not worth it to
+        * add an explicit full barrier to avoid this.
+        */
+       barrier();
+       index = ring[head].index;
+       data[index].buf = buf;
+       data[index].data = datap;
+       /* Barrier A (for pairing) */
+       smp_release();
+       ring[head].flags = DESC_HW;
+
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+       unsigned index;
+       void *datap;
+
+       if (ring[head].flags & DESC_HW)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       *lenp = ring[head].len;
+       index = ring[head].index & (ring_size - 1);
+       datap = data[index].data;
+       *bufp = data[index].buf;
+       data[index].buf = NULL;
+       data[index].data = NULL;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       while (ring[head].flags & DESC_HW)
+               busy_wait();
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       event->call_index = guest.last_used_idx;
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!need_event(event->kick_index,
+                       guest.avail_idx,
+                       guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       event->kick_index = host.used_idx;
+       /* Barrier C (for pairing) */
+       smp_mb();
+       return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       while (!(ring[head].flags & DESC_HW))
+               busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       if (!(ring[head].flags & DESC_HW))
+               return false;
+
+       /* make sure length read below is not speculated */
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       /* simple in-order completion: we don't need
+        * to touch index at all. This also means we
+        * can just modify the descriptor in-place.
+        */
+       ring[head].len--;
+       /* Make sure len is valid before flags.
+        * Note: alternative is to write len and flags in one access -
+        * possible on 64 bit architectures but wmb is free on Intel anyway
+        * so I have no way to test whether it's a gain.
+        */
+       /* Barrier B (for pairing) */
+       smp_release();
+       ring[head].flags = 0;
+       host.used_idx++;
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!need_event(event->call_index,
+                       host.used_idx,
+                       host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755 (executable)
index 0000000..52b0f71
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+       #Don't run guest and host on same CPU
+       #It actually works ok if using signalling
+       if
+               (echo "$@" | grep -e "--sleep" > /dev/null) || \
+                       test $HOST_AFFINITY '!=' $cpu
+       then
+               echo "GUEST AFFINITY $cpu"
+               "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+       fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644 (file)
index 0000000..47c9a1a
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+       void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+       unsigned short avail_idx;
+       unsigned short last_used_idx;
+       unsigned short num_free;
+       unsigned short kicked_avail_idx;
+       unsigned short free_head;
+       unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned short used_idx;
+       unsigned short called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+       void *p;
+
+       ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       memset(p, 0, vring_size(ring_size, 0x1000));
+       vring_init(&ring, ring_size, p, 0x1000);
+
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       /* Put everything in free lists. */
+       guest.free_head = 0;
+       for (i = 0; i < ring_size - 1; i++)
+               ring.desc[i].next = i + 1;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, avail;
+       struct vring_desc *desc;
+
+       if (!guest.num_free)
+               return -1;
+
+       head = guest.free_head;
+       guest.num_free--;
+
+       desc = ring.desc;
+       desc[head].flags = VRING_DESC_F_NEXT;
+       desc[head].addr = (unsigned long)(void *)buf;
+       desc[head].len = len;
+       /* We do it like this to simulate the way
+        * we'd have to flip it if we had multiple
+        * descriptors.
+        */
+       desc[head].flags &= ~VRING_DESC_F_NEXT;
+       guest.free_head = desc[head].next;
+
+       data[head].data = datap;
+
+#ifdef RING_POLL
+       /* Barrier A (for pairing) */
+       smp_release();
+       avail = guest.avail_idx++;
+       ring.avail->ring[avail & (ring_size - 1)] =
+               (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+       avail = (ring_size - 1) & (guest.avail_idx++);
+       ring.avail->ring[avail] = head;
+       /* Barrier A (for pairing) */
+       smp_release();
+#endif
+       ring.avail->idx = guest.avail_idx;
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head;
+       unsigned index;
+       void *datap;
+
+#ifdef RING_POLL
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+       if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       index &= ring_size - 1;
+#else
+       if (ring.used->idx == guest.last_used_idx)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+#endif
+       *lenp = ring.used->ring[head].len;
+       datap = data[index].data;
+       *bufp = (void*)(unsigned long)ring.desc[index].addr;
+       data[index].data = NULL;
+       ring.desc[index].next = guest.free_head;
+       guest.free_head = index;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       for (;;) {
+               unsigned index = ring.used->ring[head].id;
+
+               if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       unsigned head = guest.last_used_idx;
+
+       while (ring.used->idx == head)
+               busy_wait();
+#endif
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned short last_used_idx;
+
+       vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned short head = last_used_idx & (ring_size - 1);
+               unsigned index = ring.used->ring[head].id;
+
+               return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_avail_event(&ring),
+                             guest.avail_idx,
+                             guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = host.used_idx;
+
+       vring_avail_event(&ring) = head;
+       /* Barrier C (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+               return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+       unsigned head = host.used_idx;
+#ifdef RING_POLL
+       for (;;) {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+               if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       while (ring.avail->idx == head)
+               busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned used_idx = host.used_idx;
+       struct vring_desc *desc;
+       unsigned head;
+
+#ifdef RING_POLL
+       head = ring.avail->ring[used_idx & (ring_size - 1)];
+       if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+               return false;
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       desc = &ring.desc[head & (ring_size - 1)];
+#else
+       if (used_idx == ring.avail->idx)
+               return false;
+
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       head = ring.avail->ring[used_idx];
+       desc = &ring.desc[head];
+#endif
+
+       *lenp = desc->len;
+       *bufp = (void *)(unsigned long)desc->addr;
+
+       /* now update used ring */
+       ring.used->ring[used_idx].id = head;
+       ring.used->ring[used_idx].len = desc->len - 1;
+       /* Barrier B (for pairing) */
+       smp_release();
+       host.used_idx++;
+       ring.used->idx = host.used_idx;
+       
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_used_event(&ring),
+                             host.used_idx,
+                             host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644 (file)
index 0000000..84fc2c5
--- /dev/null
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"