Merge branch 'for-4.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 16:31:59 +0000 (09:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 16:31:59 +0000 (09:31 -0700)
Pull cgroup fixes from Tejun Heo:
 "Two fixes for cgroup.

   - There still was a hole in enforcing cpuset rules, fixed by Li.

   - The recent switch to global percpu_rwseom for threadgroup locking
     revealed a couple issues in how percpu_rwsem is implemented and
     used by cgroup.  Balbir found that the read locking section was too
     wide unnecessarily including operations which can often depend on
     IOs.  With percpu_rwsem updates (coming through a different tree)
     and reduction of read locking section, all the reported locking
     latency issues, including the android one, are resolved.

  It looks like we can keep global percpu_rwsem locking for now.  If
  there actually are cases which can't be resolved, we can go back to
  more complex per-signal_struct locking"

* 'for-4.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
  cpuset: make sure new tasks conform to the current config of the cpuset

861 files changed:
Documentation/PCI/MSI-HOWTO.txt
Documentation/arm64/silicon-errata.txt
Documentation/block/queue-sysfs.txt
Documentation/conf.py
Documentation/devicetree/bindings/sound/omap-mcpdm.txt
Documentation/devicetree/bindings/thermal/thermal.txt
Documentation/hwmon/ftsteutates
Documentation/kernel-documentation.rst
Documentation/kernel-parameters.txt
Documentation/networking/dsa/dsa.txt
Documentation/networking/rxrpc.txt
Documentation/power/basic-pm-debugging.txt
Documentation/power/interface.txt
Documentation/powerpc/transactional_memory.txt
Documentation/sphinx-static/theme_overrides.css
MAINTAINERS
Makefile
arch/Kconfig
arch/arc/include/asm/entry.h
arch/arc/include/asm/irqflags-compact.h
arch/arc/include/asm/pgtable.h
arch/arc/include/uapi/asm/elf.h
arch/arc/kernel/arcksyms.c
arch/arc/kernel/process.c
arch/arc/kernel/setup.c
arch/arc/mm/cache.c
arch/arc/mm/highmem.c
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/dts/arm-realview-pbx-a9.dts
arch/arm/boot/dts/integratorap.dts
arch/arm/boot/dts/integratorcp.dts
arch/arm/boot/dts/keystone.dtsi
arch/arm/boot/dts/tegra124-jetson-tk1.dts
arch/arm/configs/aspeed_g4_defconfig
arch/arm/configs/aspeed_g5_defconfig
arch/arm/include/asm/uaccess.h
arch/arm/kernel/entry-armv.S
arch/arm/kernel/sys_oabi-compat.c
arch/arm/kvm/arm.c
arch/arm/kvm/mmu.c
arch/arm/mach-clps711x/Kconfig
arch/arm/mach-imx/gpc.c
arch/arm/mach-mvebu/Makefile
arch/arm/mach-oxnas/Kconfig
arch/arm/mach-pxa/corgi.c
arch/arm/mach-pxa/idp.c
arch/arm/mach-pxa/spitz.c
arch/arm/mach-pxa/xcep.c
arch/arm/mach-realview/Makefile
arch/arm/mach-realview/core.c
arch/arm/mach-s5pv210/Makefile
arch/arm/mach-sa1100/pleb.c
arch/arm/mach-shmobile/platsmp.c
arch/arm/mm/mmu.c
arch/arm/xen/enlighten.c
arch/arm64/Kconfig
arch/arm64/Kconfig.platforms
arch/arm64/boot/dts/exynos/exynos7-espresso.dts
arch/arm64/configs/defconfig
arch/arm64/include/asm/kprobes.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/kernel/sleep.S
arch/arm64/kernel/smp.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/numa.c
arch/blackfin/mach-bf561/boards/cm_bf561.c
arch/blackfin/mach-bf561/boards/ezkit.c
arch/h8300/include/asm/io.h
arch/ia64/Kconfig
arch/ia64/include/asm/uaccess.h
arch/m68k/kernel/signal.c
arch/metag/mm/init.c
arch/mips/include/asm/page.h
arch/mips/kvm/emulate.c
arch/mips/kvm/mmu.c
arch/parisc/include/uapi/asm/errno.h
arch/parisc/kernel/processor.c
arch/parisc/kernel/time.c
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/crypto/crc32c-vpmsum_glue.c
arch/powerpc/include/asm/cpuidle.h
arch/powerpc/include/asm/cputhreads.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/hmi.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/include/asm/xics.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/hmi.c [deleted file]
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vdso32/Makefile
arch/powerpc/kernel/vdso64/Makefile
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s_hv_hmi.c [new file with mode: 0644]
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/lib/checksum_32.S
arch/powerpc/lib/feature-fixups.c
arch/powerpc/mm/fault.c
arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/embedded6xx/holly.c
arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/powernv/opal-dump.c
arch/powerpc/platforms/powernv/opal-elog.c
arch/powerpc/platforms/powernv/opal-irqchip.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/powerpc/platforms/pseries/pci.c
arch/powerpc/platforms/pseries/pci_dlpar.c
arch/powerpc/sysdev/cpm1.c
arch/powerpc/sysdev/cpm_common.c
arch/powerpc/sysdev/fsl_rio.c
arch/powerpc/sysdev/xics/Kconfig
arch/powerpc/sysdev/xics/ics-opal.c
arch/powerpc/sysdev/xics/ics-rtas.c
arch/powerpc/sysdev/xics/xics-common.c
arch/s390/Kconfig
arch/s390/boot/compressed/head.S
arch/s390/configs/default_defconfig
arch/s390/configs/gcov_defconfig
arch/s390/configs/performance_defconfig
arch/s390/crypto/crc32-vx.c
arch/s390/defconfig
arch/s390/kernel/head.S
arch/s390/kernel/setup.c
arch/s390/kvm/kvm-s390.c
arch/s390/lib/string.c
arch/s390/lib/uaccess.c
arch/s390/mm/pageattr.c
arch/sparc/Kconfig
arch/sparc/include/asm/uaccess_32.h
arch/sparc/include/asm/uaccess_64.h
arch/um/include/asm/common.lds.S
arch/unicore32/include/asm/mmu_context.h
arch/x86/Kconfig
arch/x86/crypto/sha256-mb/sha256_mb.c
arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
arch/x86/crypto/sha512-mb/sha512_mb.c
arch/x86/entry/Makefile
arch/x86/entry/entry_64.S
arch/x86/events/intel/uncore_snb.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/include/asm/apic.h
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/init.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/realmode.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/uaccess_32.h
arch/x86/include/asm/uaccess_64.h
arch/x86/include/asm/uv/bios.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/head32.c
arch/x86/kernel/head64.c
arch/x86/kernel/hpet.c
arch/x86/kernel/irq.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tsc.c
arch/x86/kernel/uprobes.c
arch/x86/kvm/vmx.c
arch/x86/lib/hweight.S
arch/x86/lib/kaslr.c
arch/x86/mm/ident_map.c
arch/x86/mm/init.c
arch/x86/mm/kaslr.c
arch/x86/pci/vmd.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/uv/bios_uv.c
arch/x86/power/hibernate_64.c
arch/x86/realmode/init.c
arch/x86/xen/enlighten.c
block/bio.c
block/blk-core.c
block/blk-merge.c
block/blk-mq.c
block/elevator.c
crypto/Kconfig
crypto/sha3_generic.c
drivers/acpi/nfit/core.c
drivers/block/floppy.c
drivers/block/rbd.c
drivers/block/virtio_blk.c
drivers/block/xen-blkfront.c
drivers/clocksource/arm_arch_timer.c
drivers/clocksource/bcm_kona_timer.c
drivers/clocksource/mips-gic-timer.c
drivers/clocksource/pxa_timer.c
drivers/clocksource/sun4i_timer.c
drivers/clocksource/time-armada-370-xp.c
drivers/clocksource/time-pistachio.c
drivers/clocksource/timer-atmel-pit.c
drivers/cpufreq/powernv-cpufreq.c
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamhash.c
drivers/dax/pmem.c
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/sb_edac.c
drivers/edac/skx_edac.c [new file with mode: 0644]
drivers/firmware/efi/capsule-loader.c
drivers/firmware/efi/capsule.c
drivers/gpio/Kconfig
drivers/gpio/gpio-max730x.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/ci_dpm.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
drivers/gpu/drm/cirrus/cirrus_main.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_csr.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_fbc.c
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/mediatek/Kconfig
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/qxl/qxl_fb.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/radeon_atpx_handler.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
drivers/gpu/drm/tegra/dsi.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/gpu/drm/udl/udl_fb.c
drivers/gpu/host1x/mipi.c
drivers/hwmon/it87.c
drivers/i2c/busses/i2c-at91.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-bcm-kona.c
drivers/i2c/busses/i2c-brcmstb.c
drivers/i2c/busses/i2c-cros-ec-tunnel.c
drivers/i2c/busses/i2c-meson.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/muxes/i2c-demux-pinctrl.c
drivers/infiniband/core/cma.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/debugfs.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/qsfp.c
drivers/infiniband/hw/hfi1/qsfp.h
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_utils.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib_debugfs.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/rmi4/rmi_driver.c
drivers/input/serio/i8042.c
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/silead.c
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/dma-iommu.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/mtk_iommu.h
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-mips-gic.c
drivers/macintosh/ams/ams-i2c.c
drivers/macintosh/windfarm_pm112.c
drivers/macintosh/windfarm_pm72.c
drivers/macintosh/windfarm_rm31.c
drivers/md/bcache/super.c
drivers/md/dm-crypt.c
drivers/md/dm-flakey.c
drivers/md/dm-log.c
drivers/md/dm-raid.c
drivers/md/dm-round-robin.c
drivers/misc/Makefile
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/misc/cxl/vphb.c
drivers/misc/lkdtm_usercopy.c
drivers/mmc/card/block.c
drivers/mmc/card/queue.c
drivers/mmc/card/queue.h
drivers/net/bonding/bond_main.c
drivers/net/dsa/b53/b53_regs.h
drivers/net/dsa/bcm_sf2.h
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/atheros/alx/reg.h
drivers/net/ethernet/broadcom/bgmac-bcma.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cavium/thunder/nic_reg.h
drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/davicom/dm9000.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/freescale/gianfar.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
drivers/net/ethernet/intel/e1000e/82571.c
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/trap.h
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
drivers/net/ethernet/realtek/8139cp.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/smsc/smc91x.h
drivers/net/ethernet/synopsys/dwc_eth_qos.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/tundra/tsi108_eth.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/macvtap.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/team/team_mode_loadbalance.c
drivers/net/tun.c
drivers/net/usb/kaweth.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/vxlan.c
drivers/net/wireless/ti/wlcore/main.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt_devs.c
drivers/nvdimm/nd.h
drivers/nvme/host/core.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/rdma.c
drivers/of/base.c
drivers/of/fdt.c
drivers/of/irq.c
drivers/of/platform.c
drivers/pci/host-bridge.c
drivers/pci/msi.c
drivers/perf/arm_pmu.c
drivers/pinctrl/intel/pinctrl-merrifield.c
drivers/pinctrl/meson/pinctrl-meson.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-pistachio.c
drivers/platform/olpc/olpc-ec.c
drivers/platform/x86/dell-wmi.c
drivers/platform/x86/intel_pmic_gpio.c
drivers/power/max17042_battery.c
drivers/power/reset/Kconfig
drivers/power/reset/hisi-reboot.c
drivers/power/tps65217_charger.c
drivers/rapidio/rio_cm.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_eckd.c
drivers/s390/cio/device.c
drivers/s390/cio/device_status.c
drivers/s390/cio/io_sch.h
drivers/s390/cio/qdio_main.c
drivers/s390/virtio/Makefile
drivers/s390/virtio/kvm_virtio.c
drivers/scsi/aacraid/commctrl.c
drivers/scsi/fcoe/fcoe_ctlr.c
drivers/scsi/ipr.c
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/ses.c
drivers/thermal/clock_cooling.c
drivers/thermal/cpu_cooling.c
drivers/thermal/fair_share.c
drivers/thermal/gov_bang_bang.c
drivers/thermal/imx_thermal.c
drivers/thermal/int340x_thermal/int3406_thermal.c
drivers/thermal/intel_pch_thermal.c
drivers/thermal/intel_powerclamp.c
drivers/thermal/power_allocator.c
drivers/thermal/step_wise.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_hwmon.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/core/config.c
drivers/usb/core/devio.c
drivers/usb/core/hub.c
drivers/usb/dwc3/dwc3-of-simple.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/composite.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/rndis.c
drivers/usb/gadget/function/u_ether.c
drivers/usb/gadget/function/uvc_configfs.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/core.c
drivers/usb/gadget/udc/fsl_qe_udc.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/max3421-hcd.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/misc/ftdi-elan.c
drivers/usb/misc/usbtest.c
drivers/usb/phy/phy-omap-otg.c
drivers/usb/renesas_usbhs/common.c
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/renesas_usbhs/mod_gadget.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/option.c
drivers/usb/serial/usb-serial.c
drivers/vfio/pci/vfio_pci_intrs.c
drivers/vhost/scsi.c
drivers/vhost/test.c
drivers/vhost/vsock.c
drivers/virtio/virtio_ring.c
drivers/xen/xenbus/xenbus_dev_frontend.c
fs/afs/cmservice.c
fs/afs/fsclient.c
fs/afs/internal.h
fs/afs/rxrpc.c
fs/afs/vlclient.c
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/volumes.c
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/dlm/debug_fs.c
fs/ext4/inode.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/ext4/xattr.h
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/node.c
fs/f2fs/super.c
fs/fs-writeback.c
fs/iomap.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4renewd.c
fs/nfs/nfs4state.c
fs/nfsd/nfs4state.c
fs/nfsd/vfs.c
fs/pipe.c
fs/proc/meminfo.c
fs/seq_file.c
fs/ubifs/tnc_commit.c
fs/ubifs/xattr.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_iops.c
fs/xfs/xfs_trace.h
include/asm-generic/qrwlock.h
include/drm/ttm/ttm_bo_driver.h
include/linux/bio.h
include/linux/blkdev.h
include/linux/bvec.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/host1x.h
include/linux/irqchip/arm-gic-v3.h
include/linux/kvm_host.h
include/linux/mmzone.h
include/linux/msi.h
include/linux/netdevice.h
include/linux/netfilter/nfnetlink_acct.h
include/linux/pci.h
include/linux/perf_event.h
include/linux/printk.h
include/linux/qed/qed_if.h
include/linux/sctp.h
include/linux/skbuff.h
include/linux/slab.h
include/linux/smc91x.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/xprt.h
include/linux/sysctl.h
include/linux/thread_info.h
include/linux/uaccess.h
include/net/act_api.h
include/net/af_rxrpc.h
include/net/gre.h
include/net/inet_ecn.h
include/net/mac80211.h
include/net/pkt_cls.h
include/net/tcp.h
include/rdma/ib_verbs.h
include/trace/events/timer.h
include/uapi/linux/atm_zatm.h
include/uapi/linux/bpf.h
include/uapi/linux/if_pppol2tp.h
include/uapi/linux/if_pppox.h
include/uapi/linux/if_tunnel.h
include/uapi/linux/ipx.h
include/uapi/linux/libc-compat.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/openvswitch.h
include/uapi/linux/sctp.h
include/uapi/linux/virtio_vsock.h
include/uapi/misc/cxl.h
include/xen/xen-ops.h
init/Kconfig
kernel/bpf/hashtab.c
kernel/bpf/verifier.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/futex.c
kernel/irq/affinity.c
kernel/irq/chip.c
kernel/irq/manage.c
kernel/irq/msi.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/qspinlock_stat.h
kernel/power/hibernate.c
kernel/power/snapshot.c
kernel/printk/braille.c
kernel/printk/internal.h
kernel/printk/nmi.c
kernel/printk/printk.c
kernel/sched/core.c
kernel/sched/cpudeadline.c
kernel/sched/cputime.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sysctl.c
kernel/time/timekeeping.c
kernel/time/timekeeping_debug.c
kernel/time/timer.c
kernel/trace/blktrace.c
lib/rhashtable.c
lib/strncpy_from_user.c
lib/strnlen_user.c
lib/test_rhashtable.c
mm/Kconfig
mm/Makefile
mm/huge_memory.c
mm/hugetlb.c
mm/kasan/quarantine.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/oom_kill.c
mm/page_alloc.c
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/slab.c
mm/slub.c
mm/usercopy.c [new file with mode: 0644]
net/8021q/vlan.c
net/9p/trans_virtio.c
net/bluetooth/af_bluetooth.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sock.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bridge/br_fdb.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/string_table.c
net/core/dev.c
net/core/filter.c
net/ipv4/fib_trie.c
net/ipv4/ip_gre.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ip_vti.c
net/ipv4/tcp.c
net/ipv4/tcp_diag.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/udplite.c
net/ipv6/addrconf.c
net/ipv6/calipso.c
net/ipv6/ip6_gre.c
net/ipv6/ping.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udplite.c
net/irda/iriap.c
net/l2tp/l2tp_ppp.c
net/mac80211/cfg.c
net/mac80211/driver-ops.h
net/mac80211/mesh.c
net/mac80211/rx.c
net/mac80211/status.c
net/mac80211/tx.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_h323_main.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_sip.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_rbtree.c
net/netfilter/xt_TPROXY.c
net/netfilter/xt_nfacct.c
net/openvswitch/conntrack.c
net/openvswitch/vport-geneve.c
net/openvswitch/vport-gre.c
net/openvswitch/vport-internal_dev.c
net/openvswitch/vport-vxlan.c
net/rxrpc/ar-internal.h
net/rxrpc/call_accept.c
net/rxrpc/call_event.c
net/rxrpc/call_object.c
net/rxrpc/input.c
net/rxrpc/recvmsg.c
net/rxrpc/skbuff.c
net/sched/act_api.c
net/sched/act_ife.c
net/sched/act_police.c
net/sched/cls_api.c
net/sched/sch_generic.c
net/sctp/input.c
net/sctp/inqueue.c
net/sctp/proc.c
net/sctp/sctp_diag.c
net/sctp/ulpevent.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/clnt.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c
net/tipc/monitor.c
net/tipc/socket.c
net/tipc/udp_media.c
net/vmw_vsock/virtio_transport.c
net/wireless/chan.c
net/wireless/nl80211.c
samples/bpf/bpf_helpers.h
samples/bpf/test_cgrp2_tc_kern.c
samples/bpf/test_maps.c
scripts/Kbuild.include
scripts/Makefile.gcc-plugins
scripts/gcc-plugin.sh
scripts/gcc-plugins/Makefile
scripts/get_maintainer.pl
security/Kconfig
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/soc/atmel/atmel_ssc_dai.c
sound/soc/codecs/da7213.c
sound/soc/codecs/max98371.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/wm2000.c
sound/soc/generic/Makefile
sound/soc/generic/simple-card-utils.c
sound/soc/intel/skylake/skl-sst-utils.c
sound/soc/intel/skylake/skl.c
sound/soc/omap/omap-abe-twl6040.c
sound/soc/omap/omap-mcpdm.c
sound/soc/samsung/s3c24xx_uda134x.c
sound/soc/sh/rcar/src.c
sound/soc/soc-compress.c
sound/soc/soc-core.c
sound/soc/soc-dapm.c
sound/usb/line6/pcm.c
sound/usb/line6/pod.c
sound/usb/quirks.c
tools/arch/arm64/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/sie.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/required-features.h
tools/arch/x86/include/uapi/asm/vmx.h
tools/gpio/gpio-event-mon.c
tools/include/linux/string.h
tools/include/uapi/linux/bpf.h
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-script.txt
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/builtin-mem.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/util/evsel.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
tools/perf/util/jitdump.c
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-file.c
tools/perf/util/probe-finder.c
tools/perf/util/sort.c
tools/perf/util/symbol-elf.c
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
tools/testing/nvdimm/test/nfit.c
tools/testing/selftests/powerpc/Makefile
tools/virtio/linux/dma-mapping.h
tools/virtio/linux/kernel.h
tools/virtio/linux/slab.h
tools/virtio/linux/virtio.h
tools/virtio/linux/virtio_config.h
tools/virtio/ringtest/ptr_ring.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h
virt/kvm/kvm_main.c

index c55df29..cd9c9f6 100644 (file)
@@ -94,14 +94,11 @@ has a requirements for a minimum number of vectors the driver can pass a
 min_vecs argument set to this limit, and the PCI core will return -ENOSPC
 if it can't meet the minimum number of vectors.
 
-The flags argument should normally be set to 0, but can be used to pass the
-PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
-MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
-case the device does not support legacy interrupt lines.
-
-By default this function will spread the interrupts around the available
-CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
-flag.
+The flags argument is used to specify which type of interrupt can be used
+by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
+any possible kind of interrupt.  If the PCI_IRQ_AFFINITY flag is set,
+pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
 
 To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
 vectors, use the following function:
@@ -131,7 +128,7 @@ larger than the number supported by the device it will automatically be
 capped to the supported limit, so there is no need to query the number of
 vectors supported beforehand:
 
-       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0);
+       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
        if (nvec < 0)
                goto out_err;
 
@@ -140,7 +137,7 @@ interrupts it can request a particular number of interrupts by passing that
 number to pci_alloc_irq_vectors() function as both 'min_vecs' and
 'max_vecs' parameters:
 
-       ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0);
+       ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
        if (ret < 0)
                goto out_err;
 
@@ -148,15 +145,14 @@ The most notorious example of the request type described above is enabling
 the single MSI mode for a device.  It could be done by passing two 1s as
 'min_vecs' and 'max_vecs':
 
-       ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
        if (ret < 0)
                goto out_err;
 
 Some devices might not support using legacy line interrupts, in which case
-the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform
-can't provide MSI or MSI-X interrupts:
+the driver can specify that only MSI or MSI-X is acceptable:
 
-       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY);
+       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
        if (nvec < 0)
                goto out_err;
 
index 4da60b4..ccc6032 100644 (file)
@@ -53,6 +53,7 @@ stable kernels.
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | MMU-500         | #841119,#826419 | N/A                     |
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
index d515d58..2a39040 100644 (file)
@@ -14,6 +14,12 @@ add_random (RW)
 This file allows to turn off the disk entropy contribution. Default
 value of this file is '1'(on).
 
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache.  It shows '1'
+if true, '0' if not.
+
 discard_granularity (RO)
 -----------------------
 This shows the size of internal allocation of the device in bytes, if
@@ -46,6 +52,12 @@ hw_sector_size (RO)
 -------------------
 This is the hardware sector size of the device, in bytes.
 
+io_poll (RW)
+------------
+When read, this file shows the total number of block IO polls and how
+many returned success.  Writing '0' to this file will disable polling
+for this device.  Writing any non-zero value will enable this feature.
+
 iostats (RW)
 -------------
 This file is used to control (on/off) the iostats accounting of the
@@ -151,5 +163,11 @@ device state. This means that it might not be safe to toggle the
 setting from "write back" to "write through", since that will also
 eliminate cache flushes issued by the kernel.
 
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command.  A value of '0' means write-same is not supported by this
+device.
+
 
 Jens Axboe <jens.axboe@oracle.com>, February 2009
index 96b7aa6..106ae9c 100644 (file)
@@ -131,7 +131,7 @@ pygments_style = 'sphinx'
 todo_include_todos = False
 
 primary_domain = 'C'
-highlight_language = 'C'
+highlight_language = 'guess'
 
 # -- Options for HTML output ----------------------------------------------
 
index 6f6c2f8..0741dff 100644 (file)
@@ -8,8 +8,6 @@ Required properties:
 - interrupts: Interrupt number for McPDM
 - interrupt-parent: The parent interrupt controller
 - ti,hwmods: Name of the hwmod associated to the McPDM
-- clocks:  phandle for the pdmclk provider, likely <&twl6040>
-- clock-names: Must be "pdmclk"
 
 Example:
 
@@ -21,11 +19,3 @@ mcpdm: mcpdm@40132000 {
        interrupt-parent = <&gic>;
        ti,hwmods = "mcpdm";
 };
-
-In board DTS file the pdmclk needs to be added:
-
-&mcpdm {
-       clocks = <&twl6040>;
-       clock-names = "pdmclk";
-       status = "okay";
-};
index 41b817f..88b6ea1 100644 (file)
@@ -62,7 +62,7 @@ For more examples of cooling devices, refer to the example sections below.
 Required properties:
 - #cooling-cells:      Used to provide cooling device specific information
   Type: unsigned       while referring to it. Must be at least 2, in order
-  Size: one cell       to specify minimum and maximum cooling state used
+  Size: one cell       to specify minimum and maximum cooling state used
                        in the reference. The first cell is the minimum
                        cooling state requested and the second cell is
                        the maximum cooling state requested in the reference.
@@ -119,7 +119,7 @@ Required properties:
 Optional property:
 - contribution:                The cooling contribution to the thermal zone of the
   Type: unsigned       referred cooling device at the referred trip point.
-  Size: one cell       The contribution is a ratio of the sum
+  Size: one cell       The contribution is a ratio of the sum
                        of all cooling contributions within a thermal zone.
 
 Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
@@ -145,7 +145,7 @@ Required properties:
   Size: one cell
 
 - thermal-sensors:     A list of thermal sensor phandles and sensor specifier
-  Type: list of        used while monitoring the thermal zone.
+  Type: list of                used while monitoring the thermal zone.
   phandles + sensor
   specifier
 
@@ -473,7 +473,7 @@ thermal-zones {
                                  <&adc>;       /* pcb north */
 
                /* hotspot = 100 * bandgap - 120 * adc + 484 */
-               coefficients =          <100    -120    484>;
+               coefficients =          <100    -120    484>;
 
                trips {
                        ...
@@ -502,7 +502,7 @@ from the ADC sensor. The binding would be then:
         thermal-sensors =  <&adc>;
 
                /* hotspot = 1 * adc + 6000 */
-       coefficients =          <1      6000>;
+       coefficients =          <1      6000>;
 
 (d) - Board thermal
 
index 2a1bf69..8c10a91 100644 (file)
@@ -19,5 +19,5 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and
 implemented in this driver.
 
 Specification of the chip can be found here:
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
index c4eb504..391decc 100644 (file)
@@ -366,8 +366,6 @@ Domain`_ references.
 Cross-referencing from reStructuredText
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. highlight:: none
-
 To cross-reference the functions and types defined in the kernel-doc comments
 from reStructuredText documents, please use the `Sphinx C Domain`_
 references. For example::
@@ -390,8 +388,6 @@ For further details, please refer to the `Sphinx C Domain`_ documentation.
 Function documentation
 ----------------------
 
-.. highlight:: c
-
 The general format of a function and function-like macro kernel-doc comment is::
 
   /**
@@ -572,8 +568,6 @@ DocBook XML [DEPRECATED]
 Converting DocBook to Sphinx
 ----------------------------
 
-.. highlight:: none
-
 Over time, we expect all of the documents under ``Documentation/DocBook`` to be
 converted to Sphinx and reStructuredText. For most DocBook XML documents, a good
 enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script,
index 46c030a..a4f4d69 100644 (file)
@@ -3032,6 +3032,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                PAGE_SIZE is used as alignment.
                                PCI-PCI bridge can be specified, if resource
                                windows need to be expanded.
+                               To specify the alignment for several
+                               instances of a device, the PCI vendor,
+                               device, subvendor, and subdevice may be
+                               specified, e.g., 4096@pci:8086:9c22:103c:198f
                ecrc=           Enable/disable PCIe ECRC (transaction layer
                                end-to-end CRC checking).
                                bios: Use BIOS/firmware settings. This is the
index 9d05ed7..f20c884 100644 (file)
@@ -587,26 +587,6 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device.
 TODO
 ====
 
-The platform device problem
----------------------------
-DSA is currently implemented as a platform device driver which is far from ideal
-as was discussed in this thread:
-
-http://permalink.gmane.org/gmane.linux.network/329848
-
-This basically prevents the device driver model to be properly used and applied,
-and support non-MDIO, non-MMIO Ethernet connected switches.
-
-Another problem with the platform device driver approach is that it prevents the
-use of a modular switch drivers build due to a circular dependency, illustrated
-here:
-
-http://comments.gmane.org/gmane.linux.network/345803
-
-Attempts of reworking this has been done here:
-
-https://lwn.net/Articles/643149/
-
 Making SWITCHDEV and DSA converge towards an unified codebase
 -------------------------------------------------------------
 
index 16a924c..70c926a 100644 (file)
@@ -790,13 +790,12 @@ The kernel interface functions are as follows:
      Data messages can have their contents extracted with the usual bunch of
      socket buffer manipulation functions.  A data message can be determined to
      be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
-     data message has been used up, rxrpc_kernel_data_delivered() should be
-     called on it..
+     data message has been used up, rxrpc_kernel_data_consumed() should be
+     called on it.
 
-     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
-     of.  It is possible to get extra refs on all types of message for later
-     freeing, but this may pin the state of a call until the message is finally
-     freed.
+     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
+     is possible to get extra refs on all types of message for later freeing,
+     but this may pin the state of a call until the message is finally freed.
 
  (*) Accept an incoming call.
 
@@ -821,12 +820,14 @@ The kernel interface functions are as follows:
      Other errors may be returned if the call had been aborted (-ECONNABORTED)
      or had timed out (-ETIME).
 
- (*) Record the delivery of a data message and free it.
+ (*) Record the delivery of a data message.
 
-       void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+       void rxrpc_kernel_data_consumed(struct rxrpc_call *call,
+                                       struct sk_buff *skb);
 
-     This is used to record a data message as having been delivered and to
-     update the ACK state for the call.  The socket buffer will be freed.
+     This is used to record a data message as having been consumed and to
+     update the ACK state for the call.  The message must still be passed to
+     rxrpc_kernel_free_skb() for disposal by the caller.
 
  (*) Free a message.
 
index b96098c..708f87f 100644 (file)
@@ -164,7 +164,32 @@ load n/2 modules more and try again.
 Again, if you find the offending module(s), it(they) must be unloaded every time
 before hibernation, and please report the problem with it(them).
 
-c) Advanced debugging
+c) Using the "test_resume" hibernation option
+
+/sys/power/disk generally tells the kernel what to do after creating a
+hibernation image.  One of the available options is "test_resume" which
+causes the just created image to be used for immediate restoration.  Namely,
+after doing:
+
+# echo test_resume > /sys/power/disk
+# echo disk > /sys/power/state
+
+a hibernation image will be created and a resume from it will be triggered
+immediately without involving the platform firmware in any way.
+
+That test can be used to check if failures to resume from hibernation are
+related to bad interactions with the platform firmware.  That is, if the above
+works every time, but resume from actual hibernation does not work or is
+unreliable, the platform firmware may be responsible for the failures.
+
+On architectures and platforms that support using different kernels to restore
+hibernation images (that is, the kernel used to read the image from storage and
+load it into memory is different from the one included in the image) or support
+kernel address space randomization, it also can be used to check if failures
+to resume may be related to the differences between the restore and image
+kernels.
+
+d) Advanced debugging
 
 In case that hibernation does not work on your system even in the minimal
 configuration and compiling more drivers as modules is not practical or some
index f1f0f59..974916f 100644 (file)
@@ -1,75 +1,76 @@
-Power Management Interface
-
-
-The power management subsystem provides a unified sysfs interface to 
-userspace, regardless of what architecture or platform one is
-running. The interface exists in /sys/power/ directory (assuming sysfs
-is mounted at /sys). 
-
-/sys/power/state controls system power state. Reading from this file
-returns what states are supported, which is hard-coded to 'freeze',
-'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk'
-(Suspend-to-Disk). 
-
-Writing to this file one of those strings causes the system to
-transition into that state. Please see the file
-Documentation/power/states.txt for a description of each of those
-states.
-
-
-/sys/power/disk controls the operating mode of the suspend-to-disk
-mechanism. Suspend-to-disk can be handled in several ways. We have a
-few options for putting the system to sleep - using the platform driver
-(e.g. ACPI or other suspend_ops), powering off the system or rebooting the
-system (for testing).
-
-Additionally, /sys/power/disk can be used to turn on one of the two testing
-modes of the suspend-to-disk mechanism: 'testproc' or 'test'.  If the
-suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
-/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
-tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs.  If it is
-in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
-to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
-for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
-we are able to look in the log messages and work out, for example, which code
-is being slow and which device drivers are misbehaving.
-
-Reading from this file will display all supported modes and the currently
-selected one in brackets, for example
-
-       [shutdown] reboot test testproc
-
-Writing to this file will accept one of
-
-       'platform' (only if the platform supports it)
-       'shutdown'
-       'reboot'
-       'testproc'
-       'test'
-
-/sys/power/image_size controls the size of the image created by
-the suspend-to-disk mechanism.  It can be written a string
-representing a non-negative integer that will be used as an upper
-limit of the image size, in bytes.  The suspend-to-disk mechanism will
-do its best to ensure the image size will not exceed that number.  However,
-if this turns out to be impossible, it will try to suspend anyway using the
-smallest image possible.  In particular, if "0" is written to this file, the
-suspend image will be as small as possible.
-
-Reading from this file will display the current image size limit, which
-is set to 2/5 of available RAM by default.
-
-/sys/power/pm_trace controls the code which saves the last PM event point in
-the RTC across reboots, so that you can debug a machine that just hangs
-during suspend (or more commonly, during resume).  Namely, the RTC is only
-used to save the last PM event point if this file contains '1'.  Initially it
-contains '0' which may be changed to '1' by writing a string representing a
-nonzero integer into it.
-
-To use this debugging feature you should attempt to suspend the machine, then
-reboot it and run
-
-       dmesg -s 1000000 | grep 'hash matches'
-
-CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
-set to a random invalid time after a resume.
+Power Management Interface for System Sleep
+
+Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+The power management subsystem provides userspace with a unified sysfs interface
+for system sleep regardless of the underlying system architecture or platform.
+The interface is located in the /sys/power/ directory (assuming that sysfs is
+mounted at /sys).
+
+/sys/power/state is the system sleep state control file.
+
+Reading from it returns a list of supported sleep states, encoded as:
+
+'freeze' (Suspend-to-Idle)
+'standby' (Power-On Suspend)
+'mem' (Suspend-to-RAM)
+'disk' (Suspend-to-Disk)
+
+Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
+too as long the kernel has been configured to support hibernation at all
+(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
+for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
+platform.
+
+If one of the strings listed in /sys/power/state is written to it, the system
+will attempt to transition into the corresponding sleep state.  Refer to
+Documentation/power/states.txt for a description of each of those states.
+
+/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
+Specifically, it tells the kernel what to do after creating a hibernation image.
+
+Reading from it returns a list of supported options encoded as:
+
+'platform' (put the system into sleep using a platform-provided method)
+'shutdown' (shut the system down)
+'reboot' (reboot the system)
+'suspend' (trigger a Suspend-to-RAM transition)
+'test_resume' (resume-after-hibernation test mode)
+
+The currently selected option is printed in square brackets.
+
+The 'platform' option is only available if the platform provides a special
+mechanism to put the system to sleep after creating a hibernation image (ACPI
+does that, for example).  The 'suspend' option is available if Suspend-to-RAM
+is supported.  Refer to Documentation/power/basic_pm_debugging.txt for the
+description of the 'test_resume' option.
+
+To select an option, write the string representing it to /sys/power/disk.
+
+/sys/power/image_size controls the size of hibernation images.
+
+It can be written a string representing a non-negative integer that will be
+used as a best-effort upper limit of the image size, in bytes.  The hibernation
+core will do its best to ensure that the image size will not exceed that number.
+However, if that turns out to be impossible to achieve, a hibernation image will
+still be created and its size will be as small as possible.  In particular,
+writing '0' to this file will enforce hibernation images to be as small as
+possible.
+
+Reading from this file returns the current image size limit, which is set to
+around 2/5 of available RAM by default.
+
+/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
+or resume event point in the RTC across reboots.
+
+It helps to debug hard lockups or reboots due to device driver failures that
+occur during system suspend or resume (which is more common) more effectively.
+
+If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
+event point in turn will be stored in the RTC memory (overwriting the actual
+RTC information), so it will survive a system crash if one occurs right after
+storing it and it can be used later to identify the driver that caused the crash
+to happen (see Documentation/power/s2ram.txt for more information).
+
+Initially it contains '0' which may be changed to '1' by writing a string
+representing a nonzero integer into it.
index ba0a2a4..e32fdbb 100644 (file)
@@ -167,6 +167,8 @@ signal will be rolled back anyway.
 For signals taken in non-TM or suspended mode, we use the
 normal/non-checkpointed stack pointer.
 
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
 
 Failure cause codes used by kernel
 ==================================
index 3a2ac4b..e88461c 100644 (file)
     caption a.headerlink { opacity: 0; }
     caption a.headerlink:hover { opacity: 1; }
 
-    /* inline literal: drop the borderbox and red color */
+    /* inline literal: drop the borderbox, padding and red color */
 
     code, .rst-content tt, .rst-content code {
         color: inherit;
         border: none;
+        padding: unset;
         background: inherit;
         font-size: 85%;
     }
index 20bb1d0..71aa5da 100644 (file)
@@ -881,6 +881,15 @@ S: Supported
 F:     drivers/gpu/drm/arc/
 F:     Documentation/devicetree/bindings/display/snps,arcpgu.txt
 
+ARM ARCHITECTED TIMER DRIVER
+M:     Mark Rutland <mark.rutland@arm.com>
+M:     Marc Zyngier <marc.zyngier@arm.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+F:     arch/arm/include/asm/arch_timer.h
+F:     arch/arm64/include/asm/arch_timer.h
+F:     drivers/clocksource/arm_arch_timer.c
+
 ARM HDLCD DRM DRIVER
 M:     Liviu Dudau <liviu.dudau@arm.com>
 S:     Supported
@@ -1004,6 +1013,7 @@ N:        meson
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:     Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:     Antoine Tenart <antoine.tenart@free-electrons.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-alpine/
 F:     arch/arm/boot/dts/alpine*
@@ -4524,6 +4534,12 @@ L:       linux-edac@vger.kernel.org
 S:     Maintained
 F:     drivers/edac/sb_edac.c
 
+EDAC-SKYLAKE
+M:     Tony Luck <tony.luck@intel.com>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/edac/skx_edac.c
+
 EDAC-XGENE
 APPLIED MICRO (APM) X-GENE SOC EDAC
 M:     Loc Ho <lho@apm.com>
@@ -7654,7 +7670,7 @@ L:        linux-rdma@vger.kernel.org
 S:     Supported
 W:     https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-F:     drivers/infiniband/hw/rxe/
+F:     drivers/infiniband/sw/rxe/
 F:     include/uapi/rdma/rdma_user_rxe.h
 
 MEMBARRIER SUPPORT
index 70de144..67f42d5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc4
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -635,13 +635,6 @@ endif
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
 
-PHONY += gcc-plugins
-gcc-plugins: scripts_basic
-ifdef CONFIG_GCC_PLUGINS
-       $(Q)$(MAKE) $(build)=scripts/gcc-plugins
-endif
-       @:
-
 include scripts/Makefile.gcc-plugins
 
 ifdef CONFIG_READABLE_ASM
index bd8056b..e9c9334 100644 (file)
@@ -461,6 +461,15 @@ config CC_STACKPROTECTOR_STRONG
 
 endchoice
 
+config HAVE_ARCH_WITHIN_STACK_FRAMES
+       bool
+       help
+         An architecture should select this if it can walk the kernel stack
+         frames to determine if an object is part of either the arguments
+         or local variables (i.e. that it excludes saved return addresses,
+         and similar) by implementing an inline arch_within_stack_frames(),
+         which is used by CONFIG_HARDENED_USERCOPY.
+
 config HAVE_CONTEXT_TRACKING
        bool
        help
index ad7860c..51597f3 100644 (file)
 
 #ifdef CONFIG_ARC_CURR_IN_REG
        ; Retrieve orig r25 and save it with rest of callee_regs
-       ld.as   r12, [r12, PT_user_r25]
+       ld      r12, [r12, PT_user_r25]
        PUSH    r12
 #else
        PUSH    r25
 
        ; SP is back to start of pt_regs
 #ifdef CONFIG_ARC_CURR_IN_REG
-       st.as   r12, [sp, PT_user_r25]
+       st      r12, [sp, PT_user_r25]
 #endif
 .endm
 
index c1d3645..4c6eed8 100644 (file)
@@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void)
 .endm
 
 .macro IRQ_ENABLE  scratch
+       TRACE_ASM_IRQ_ENABLE
        lr      \scratch, [status32]
        or      \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
        flag    \scratch
-       TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif /* __ASSEMBLY__ */
index 0f92d97..89eeb37 100644 (file)
@@ -280,7 +280,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 #define pte_page(pte)          pfn_to_page(pte_pfn(pte))
 #define mk_pte(page, prot)     pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)     (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)     __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
 #define pte_pfn(pte)           (pte_val(pte) >> PAGE_SHIFT)
index 0f99ac8..0037a58 100644 (file)
 
 /* Machine specific ELF Hdr flags */
 #define EF_ARC_OSABI_MSK       0x00000f00
-#define EF_ARC_OSABI_ORIG      0x00000000   /* MUST be zero for back-compat */
-#define EF_ARC_OSABI_CURRENT   0x00000300   /* v3 (no legacy syscalls) */
+
+#define EF_ARC_OSABI_V3                0x00000300   /* v3 (no legacy syscalls) */
+#define EF_ARC_OSABI_V4                0x00000400   /* v4 (64bit data any reg align) */
+
+#if __GNUC__ < 6
+#define EF_ARC_OSABI_CURRENT   EF_ARC_OSABI_V3
+#else
+#define EF_ARC_OSABI_CURRENT   EF_ARC_OSABI_V4
+#endif
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_fpregset_t;
index 4d9e777..000dd04 100644 (file)
@@ -28,6 +28,7 @@ extern void __muldf3(void);
 extern void __divdf3(void);
 extern void __floatunsidf(void);
 extern void __floatunsisf(void);
+extern void __udivdi3(void);
 
 EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__ashrdi3);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL(__muldf3);
 EXPORT_SYMBOL(__divdf3);
 EXPORT_SYMBOL(__floatunsidf);
 EXPORT_SYMBOL(__floatunsisf);
+EXPORT_SYMBOL(__udivdi3);
 
 /* ARC optimised assembler routines */
 EXPORT_SYMBOL(memset);
index b5db9e7..be1972b 100644 (file)
@@ -199,7 +199,7 @@ int elf_check_arch(const struct elf32_hdr *x)
        }
 
        eflags = x->e_flags;
-       if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
+       if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
                pr_err("ABI mismatch - you need newer toolchain\n");
                force_sigsegv(SIGSEGV, current);
                return 0;
index a946400..f52a0d0 100644 (file)
@@ -291,8 +291,10 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
                               cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
                               cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
 
-       n += scnprintf(buf + n, len - n,
-                      "OS ABI [v3]\t: no-legacy-syscalls\n");
+       n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n",
+                       EF_ARC_OSABI_CURRENT >> 8,
+                       EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ?
+                       "no-legacy-syscalls" : "64-bit data any register aligned");
 
        return buf;
 }
index 5a294b2..0b10efe 100644 (file)
@@ -921,6 +921,15 @@ void arc_cache_init(void)
 
        printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 
+       /*
+        * Only master CPU needs to execute rest of function:
+        *  - Assume SMP so all cores will have same cache config so
+        *    any geomtry checks will be same for all
+        *  - IOC setup / dma callbacks only need to be setup once
+        */
+       if (cpu)
+               return;
+
        if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
                struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
index 04f8332..77ff64a 100644 (file)
@@ -61,6 +61,7 @@ void *kmap(struct page *page)
 
        return kmap_high(page);
 }
+EXPORT_SYMBOL(kmap);
 
 void *kmap_atomic(struct page *page)
 {
index 2d601d7..a9c4e48 100644 (file)
@@ -35,6 +35,7 @@ config ARM
        select HARDIRQS_SW_RESEND
        select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
        select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
        select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
        select HAVE_ARCH_MMAP_RND_BITS if MMU
index 56ea5c6..61f6ccc 100644 (file)
@@ -260,12 +260,14 @@ machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
 platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
 
 ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
 ifeq ($(KBUILD_SRC),)
 KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
 else
 KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
 endif
 endif
+endif
 
 export TEXT_OFFSET GZFLAGS MMUEXT
 
index db808f9..90d00b4 100644 (file)
                 * associativity as these may be erroneously set
                 * up by boot loader(s).
                 */
-               cache-size = <1048576>; // 1MB
-               cache-sets = <4096>;
+               cache-size = <131072>; // 128KB
+               cache-sets = <512>;
                cache-line-size = <32>;
                arm,parity-disable;
-               arm,tag-latency = <1>;
-               arm,data-latency = <1 1>;
-               arm,dirty-latency = <1>;
+               arm,tag-latency = <1 1 1>;
+               arm,data-latency = <1 1 1>;
        };
 
        scu: scu@1f000000 {
index cf06e32..4b34b54 100644 (file)
@@ -42,7 +42,7 @@
        };
 
        syscon {
-               compatible = "arm,integrator-ap-syscon";
+               compatible = "arm,integrator-ap-syscon", "syscon";
                reg = <0x11000000 0x100>;
                interrupt-parent = <&pic>;
                /* These are the logical module IRQs */
index d43f15b..79430fb 100644 (file)
@@ -94,7 +94,7 @@
        };
 
        syscon {
-               compatible = "arm,integrator-cp-syscon";
+               compatible = "arm,integrator-cp-syscon", "syscon";
                reg = <0xcb000000 0x100>;
        };
 
index 00cb314..e23f46d 100644 (file)
                cpu_on          = <0x84000003>;
        };
 
-       psci {
-               compatible      = "arm,psci";
-               method          = "smc";
-               cpu_suspend     = <0x84000001>;
-               cpu_off         = <0x84000002>;
-               cpu_on          = <0x84000003>;
-       };
-
        soc {
                #address-cells = <1>;
                #size-cells = <1>;
index e52b824..6403e0d 100644 (file)
         *   Pin 41: BR_UART1_TXD
         *   Pin 44: BR_UART1_RXD
         */
-       serial@70006000 {
+       serial@0,70006000 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
         *   Pin 71: UART2_CTS_L
         *   Pin 74: UART2_RTS_L
         */
-       serial@70006040 {
+       serial@0,70006040 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
index b6e54ee..ca39c04 100644 (file)
@@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
index 8926051..4f366b0 100644 (file)
@@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
index 62a6f65..a93c0f9 100644 (file)
@@ -480,7 +480,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n);
 static inline unsigned long __must_check
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       unsigned int __ua_flags = uaccess_save_and_enable();
+       unsigned int __ua_flags;
+
+       check_object_size(to, n, false);
+       __ua_flags = uaccess_save_and_enable();
        n = arm_copy_from_user(to, from, n);
        uaccess_restore(__ua_flags);
        return n;
@@ -495,11 +498,15 @@ static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 #ifndef CONFIG_UACCESS_WITH_MEMCPY
-       unsigned int __ua_flags = uaccess_save_and_enable();
+       unsigned int __ua_flags;
+
+       check_object_size(from, n, true);
+       __ua_flags = uaccess_save_and_enable();
        n = arm_copy_to_user(to, from, n);
        uaccess_restore(__ua_flags);
        return n;
 #else
+       check_object_size(from, n, true);
        return arm_copy_to_user(to, from, n);
 #endif
 }
index bc5f507..9f157e7 100644 (file)
@@ -295,6 +295,7 @@ __und_svc_fault:
        bl      __und_fault
 
 __und_svc_finish:
+       get_thread_info tsk
        ldr     r5, [sp, #S_PSR]                @ Get SVC cpsr
        svc_exit r5                             @ return from exception
  UNWIND(.fnend         )
index 087acb5..5f221ac 100644 (file)
@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
        mm_segment_t fs;
        long ret, err, i;
 
-       if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+       if (maxevents <= 0 ||
+                       maxevents > (INT_MAX/sizeof(*kbuf)) ||
+                       maxevents > (INT_MAX/sizeof(*events)))
                return -EINVAL;
+       if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+               return -EFAULT;
        kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
        if (!kbuf)
                return -ENOMEM;
@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 
        if (nsops < 1 || nsops > SEMOPM)
                return -EINVAL;
+       if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+               return -EFAULT;
        sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
        if (!sops)
                return -ENOMEM;
index d94bb90..75f130e 100644 (file)
@@ -1009,9 +1009,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
        switch (ioctl) {
        case KVM_CREATE_IRQCHIP: {
+               int ret;
                if (!vgic_present)
                        return -ENXIO;
-               return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+               mutex_lock(&kvm->lock);
+               ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+               mutex_unlock(&kvm->lock);
+               return ret;
        }
        case KVM_ARM_SET_DEVICE_ADDR: {
                struct kvm_arm_device_addr dev_addr;
index bda27b6..29d0b23 100644 (file)
@@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        smp_rmb();
 
        pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
-       if (is_error_pfn(pfn))
+       if (is_error_noslot_pfn(pfn))
                return -EFAULT;
 
        if (kvm_is_device_pfn(pfn)) {
index dc7c6ed..61284b9 100644 (file)
@@ -1,13 +1,13 @@
 menuconfig ARCH_CLPS711X
        bool "Cirrus Logic EP721x/EP731x-based"
        depends on ARCH_MULTI_V4T
-       select ARCH_REQUIRE_GPIOLIB
        select AUTO_ZRELADDR
        select CLKSRC_OF
        select CLPS711X_TIMER
        select COMMON_CLK
        select CPU_ARM720T
        select GENERIC_CLOCKEVENTS
+       select GPIOLIB
        select MFD_SYSCON
        select OF_IRQ
        select USE_OF
index fd87205..0df062d 100644 (file)
@@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node,
        for (i = 0; i < IMR_NUM; i++)
                writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4);
 
+       /*
+        * Clear the OF_POPULATED flag set in of_irq_init so that
+        * later the GPC power domain driver will not be skipped.
+        */
+       of_node_clear_flag(node, OF_POPULATED);
+
        return 0;
 }
 IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init);
index e53c6cf..6c6497e 100644 (file)
@@ -1,5 +1,4 @@
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
 
 AFLAGS_coherency_ll.o          := -Wa,-march=armv7-a
 CFLAGS_pmsu.o                  := -march=armv7-a
index 567496b..29100be 100644 (file)
@@ -11,11 +11,13 @@ if ARCH_OXNAS
 
 config MACH_OX810SE
        bool "Support OX810SE Based Products"
+       select ARCH_HAS_RESET_CONTROLLER
        select COMMON_CLK_OXNAS
        select CPU_ARM926T
        select MFD_SYSCON
        select OXNAS_RPS_TIMER
        select PINCTRL_OXNAS
+       select RESET_CONTROLLER
        select RESET_OXNAS
        select VERSATILE_FPGA_IRQ
        help
index dc109dc..10bfdb1 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/major.h>
index c410d84..66070ac 100644 (file)
@@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_USE_DMA | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
index 1080580..2c150bf 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
index 3f06cd9..056369e 100644 (file)
@@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata xcep_smc91x_info = {
-       .flags  = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
+       .flags  = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                 SMC91X_NOWAIT | SMC91X_USE_DMA,
 };
 
 static struct platform_device smc91x_device = {
index dae8d86..4048821 100644 (file)
@@ -1,8 +1,7 @@
 #
 # Makefile for the linux kernel.
 #
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
 
 obj-y                                  := core.o
 obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
index baf1745..a0ead0a 100644 (file)
@@ -93,7 +93,8 @@ static struct smsc911x_platform_config smsc911x_config = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
 };
 
 static struct platform_device realview_eth_device = {
index 72b9e96..fa7fb71 100644 (file)
@@ -5,7 +5,7 @@
 #
 # Licensed under GPLv2
 
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
 
 # Core
 
index 1525d7b..88149f8 100644 (file)
@@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
index f3dba6f..02e21bc 100644 (file)
@@ -40,5 +40,8 @@ bool shmobile_smp_cpu_can_disable(unsigned int cpu)
 bool __init shmobile_smp_init_fallback_ops(void)
 {
        /* fallback on PSCI/smp_ops if no other DT based method is detected */
+       if (!IS_ENABLED(CONFIG_SMP))
+               return false;
+
        return platform_can_secondary_boot() ? true : false;
 }
index 62f4d01..6344913 100644 (file)
@@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz)
 {
        void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
 
-       BUG_ON(!ptr);
+       if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+               BUG();
        return ptr;
 }
 
@@ -1155,10 +1156,19 @@ void __init sanity_check_meminfo(void)
 {
        phys_addr_t memblock_limit = 0;
        int highmem = 0;
-       phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+       u64 vmalloc_limit;
        struct memblock_region *reg;
        bool should_use_highmem = false;
 
+       /*
+        * Let's use our own (unoptimized) equivalent of __pa() that is
+        * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
+        * The result is used as the upper bound on physical memory address
+        * and may itself be outside the valid range for which phys_addr_t
+        * and therefore __pa() is defined.
+        */
+       vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
+
        for_each_memblock(memory, reg) {
                phys_addr_t block_start = reg->base;
                phys_addr_t block_end = reg->base + reg->size;
@@ -1183,10 +1193,11 @@ void __init sanity_check_meminfo(void)
                        if (reg->size > size_limit) {
                                phys_addr_t overlap_size = reg->size - size_limit;
 
-                               pr_notice("Truncating RAM at %pa-%pa to -%pa",
-                                         &block_start, &block_end, &vmalloc_limit);
-                               memblock_remove(vmalloc_limit, overlap_size);
+                               pr_notice("Truncating RAM at %pa-%pa",
+                                         &block_start, &block_end);
                                block_end = vmalloc_limit;
+                               pr_cont(" to -%pa", &block_end);
+                               memblock_remove(vmalloc_limit, overlap_size);
                                should_use_highmem = true;
                        }
                }
index b0b82f5..3d2cef6 100644 (file)
@@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 static struct vcpu_info __percpu *xen_vcpu_info;
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 /* These are unused until we support booting "pre-ballooned" */
index 69c8787..bc3f00f 100644 (file)
@@ -54,6 +54,7 @@ config ARM64
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_BITREVERSE
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_HUGE_VMAP
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
index bb2616b..be5d824 100644 (file)
@@ -8,7 +8,7 @@ config ARCH_SUNXI
 
 config ARCH_ALPINE
        bool "Annapurna Labs Alpine platform"
-       select ALPINE_MSI
+       select ALPINE_MSI if PCI
        help
          This enables support for the Annapurna Labs Alpine
          Soc family.
@@ -66,7 +66,7 @@ config ARCH_LG1K
 config ARCH_HISI
        bool "Hisilicon SoC Family"
        select ARM_TIMER_SP804
-       select HISILICON_IRQ_MBIGEN
+       select HISILICON_IRQ_MBIGEN if PCI
        help
          This enables support for Hisilicon ARMv8 SoC family
 
index 299f3ce..c528dd5 100644 (file)
@@ -12,6 +12,7 @@
 /dts-v1/;
 #include "exynos7.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
 
 / {
        model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -43,6 +44,8 @@
 
 &rtc {
        status = "okay";
+       clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+       clock-names = "rtc", "rtc_src";
 };
 
 &watchdog {
index 0555b7c..eadf485 100644 (file)
@@ -1,4 +1,3 @@
-# CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
@@ -15,10 +14,14 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_HUGETLB=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_NET_NS is not set
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
@@ -71,6 +74,7 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_SECCOMP=y
 CONFIG_XEN=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -84,10 +88,37 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
 CONFIG_BPF_JIT=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
@@ -103,6 +134,7 @@ CONFIG_MTD=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
 CONFIG_VIRTIO_BLK=y
 CONFIG_SRAM=y
 # CONFIG_SCSI_PROC_FS is not set
@@ -120,7 +152,10 @@ CONFIG_SATA_SIL24=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
 CONFIG_TUN=y
+CONFIG_VETH=m
 CONFIG_VIRTIO_NET=y
 CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
@@ -350,12 +385,16 @@ CONFIG_EXYNOS_ADC=y
 CONFIG_PWM_SAMSUNG=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
 CONFIG_AUTOFS4_FS=y
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=y
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
index 61b4915..1737aec 100644 (file)
@@ -22,7 +22,6 @@
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE                  1
-#define MAX_STACK_SIZE                 128
 
 #define flush_insn_slot(p)             do { } while (0)
 #define kretprobe_blacklist_size       0
@@ -47,7 +46,6 @@ struct kprobe_ctlblk {
        struct prev_kprobe prev_kprobe;
        struct kprobe_step_ctx ss_ctx;
        struct pt_regs jprobe_saved_regs;
-       char jprobes_stack[MAX_STACK_SIZE];
 };
 
 void arch_remove_kprobe(struct kprobe *);
index 5e834d1..c47257c 100644 (file)
@@ -265,22 +265,25 @@ extern unsigned long __must_check __clear_user(void __user *addr, unsigned long
 static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        kasan_check_write(to, n);
-       return  __arch_copy_from_user(to, from, n);
+       check_object_size(to, n, false);
+       return __arch_copy_from_user(to, from, n);
 }
 
 static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        kasan_check_read(from, n);
-       return  __arch_copy_to_user(to, from, n);
+       check_object_size(from, n, true);
+       return __arch_copy_to_user(to, from, n);
 }
 
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        kasan_check_write(to, n);
 
-       if (access_ok(VERIFY_READ, from, n))
+       if (access_ok(VERIFY_READ, from, n)) {
+               check_object_size(to, n, false);
                n = __arch_copy_from_user(to, from, n);
-       else /* security hole - plug it */
+       else /* security hole - plug it */
                memset(to, 0, n);
        return n;
 }
@@ -289,8 +292,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi
 {
        kasan_check_read(from, n);
 
-       if (access_ok(VERIFY_WRITE, to, n))
+       if (access_ok(VERIFY_WRITE, to, n)) {
+               check_object_size(from, n, true);
                n = __arch_copy_to_user(to, from, n);
+       }
        return n;
 }
 
index 96e4a2b..441420c 100644 (file)
@@ -353,6 +353,8 @@ el1_sync:
        lsr     x24, x1, #ESR_ELx_EC_SHIFT      // exception class
        cmp     x24, #ESR_ELx_EC_DABT_CUR       // data abort in EL1
        b.eq    el1_da
+       cmp     x24, #ESR_ELx_EC_IABT_CUR       // instruction abort in EL1
+       b.eq    el1_ia
        cmp     x24, #ESR_ELx_EC_SYS64          // configurable trap
        b.eq    el1_undef
        cmp     x24, #ESR_ELx_EC_SP_ALIGN       // stack alignment exception
@@ -364,6 +366,11 @@ el1_sync:
        cmp     x24, #ESR_ELx_EC_BREAKPT_CUR    // debug exception in EL1
        b.ge    el1_dbg
        b       el1_inv
+
+el1_ia:
+       /*
+        * Fall through to the Data abort case
+        */
 el1_da:
        /*
         * Data abort handling
index b77f583..3e7b050 100644 (file)
@@ -757,6 +757,9 @@ ENTRY(__enable_mmu)
        isb
        bl      __create_page_tables            // recreate kernel mapping
 
+       tlbi    vmalle1                         // Remove any stale TLB entries
+       dsb     nsh
+
        msr     sctlr_el1, x19                  // re-enable the MMU
        isb
        ic      iallu                           // flush instructions fetched
index 21ab5df..65d81f9 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/sections.h>
 #include <asm/smp.h>
 #include <asm/suspend.h>
+#include <asm/sysreg.h>
 #include <asm/virt.h>
 
 /*
@@ -217,12 +218,22 @@ static int create_safe_exec_page(void *src_start, size_t length,
        set_pte(pte, __pte(virt_to_phys((void *)dst) |
                         pgprot_val(PAGE_KERNEL_EXEC)));
 
-       /* Load our new page tables */
-       asm volatile("msr       ttbr0_el1, %0;"
-                    "isb;"
-                    "tlbi      vmalle1is;"
-                    "dsb       ish;"
-                    "isb" : : "r"(virt_to_phys(pgd)));
+       /*
+        * Load our new page tables. A strict BBM approach requires that we
+        * ensure that TLBs are free of any entries that may overlap with the
+        * global mappings we are about to install.
+        *
+        * For a real hibernate/resume cycle TTBR0 currently points to a zero
+        * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
+        * runtime services), while for a userspace-driven test_resume cycle it
+        * points to userspace page tables (and we must point it at a zero page
+        * ourselves). Elsewhere we only (un)install the idmap with preemption
+        * disabled, so T0SZ should be as required regardless.
+        */
+       cpu_set_reserved_ttbr0();
+       local_flush_tlb_all();
+       write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+       isb();
 
        *phys_dst_addr = virt_to_phys((void *)dst);
 
@@ -393,6 +404,38 @@ int swsusp_arch_resume(void)
        void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
                                          void *, phys_addr_t, phys_addr_t);
 
+       /*
+        * Restoring the memory image will overwrite the ttbr1 page tables.
+        * Create a second copy of just the linear map, and use this when
+        * restoring.
+        */
+       tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+       if (!tmp_pg_dir) {
+               pr_err("Failed to allocate memory for temporary page tables.");
+               rc = -ENOMEM;
+               goto out;
+       }
+       rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+       if (rc)
+               goto out;
+
+       /*
+        * Since we only copied the linear map, we need to find restore_pblist's
+        * linear map address.
+        */
+       lm_restore_pblist = LMADDR(restore_pblist);
+
+       /*
+        * We need a zero page that is zero before & after resume in order to
+        * to break before make on the ttbr1 page tables.
+        */
+       zero_page = (void *)get_safe_page(GFP_ATOMIC);
+       if (!zero_page) {
+               pr_err("Failed to allocate zero page.");
+               rc = -ENOMEM;
+               goto out;
+       }
+
        /*
         * Locate the exit code in the bottom-but-one page, so that *NULL
         * still has disastrous affects.
@@ -418,27 +461,6 @@ int swsusp_arch_resume(void)
         */
        __flush_dcache_area(hibernate_exit, exit_size);
 
-       /*
-        * Restoring the memory image will overwrite the ttbr1 page tables.
-        * Create a second copy of just the linear map, and use this when
-        * restoring.
-        */
-       tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
-       if (!tmp_pg_dir) {
-               pr_err("Failed to allocate memory for temporary page tables.");
-               rc = -ENOMEM;
-               goto out;
-       }
-       rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
-       if (rc)
-               goto out;
-
-       /*
-        * Since we only copied the linear map, we need to find restore_pblist's
-        * linear map address.
-        */
-       lm_restore_pblist = LMADDR(restore_pblist);
-
        /*
         * KASLR will cause the el2 vectors to be in a different location in
         * the resumed kernel. Load hibernate's temporary copy into el2.
@@ -453,12 +475,6 @@ int swsusp_arch_resume(void)
                __hyp_set_vectors(el2_vectors);
        }
 
-       /*
-        * We need a zero page that is zero before & after resume in order to
-        * to break before make on the ttbr1 page tables.
-        */
-       zero_page = (void *)get_safe_page(GFP_ATOMIC);
-
        hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
                       resume_hdr.reenter_kernel, lm_restore_pblist,
                       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
index bf97685..c6b0f40 100644 (file)
@@ -41,18 +41,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 static void __kprobes
 post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
 
-static inline unsigned long min_stack_size(unsigned long addr)
-{
-       unsigned long size;
-
-       if (on_irq_stack(addr, raw_smp_processor_id()))
-               size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
-       else
-               size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
-
-       return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
-}
-
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
        /* prepare insn slot */
@@ -489,20 +477,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct jprobe *jp = container_of(p, struct jprobe, kp);
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       long stack_ptr = kernel_stack_pointer(regs);
 
        kcb->jprobe_saved_regs = *regs;
        /*
-        * As Linus pointed out, gcc assumes that the callee
-        * owns the argument space and could overwrite it, e.g.
-        * tailcall optimization. So, to be absolutely safe
-        * we also save and restore enough stack bytes to cover
-        * the argument area.
+        * Since we can't be sure where in the stack frame "stacked"
+        * pass-by-value arguments are stored we just don't try to
+        * duplicate any of the stack. Do not use jprobes on functions that
+        * use more than 64 bytes (after padding each to an 8 byte boundary)
+        * of arguments, or pass individual arguments larger than 16 bytes.
         */
-       kasan_disable_current();
-       memcpy(kcb->jprobes_stack, (void *)stack_ptr,
-              min_stack_size(stack_ptr));
-       kasan_enable_current();
 
        instruction_pointer_set(regs, (unsigned long) jp->entry);
        preempt_disable();
@@ -554,10 +537,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
        }
        unpause_graph_tracing();
        *regs = kcb->jprobe_saved_regs;
-       kasan_disable_current();
-       memcpy((void *)stack_addr, kcb->jprobes_stack,
-              min_stack_size(stack_addr));
-       kasan_enable_current();
        preempt_enable_no_resched();
        return 1;
 }
index 9a3aec9..ccf79d8 100644 (file)
@@ -101,12 +101,20 @@ ENTRY(cpu_resume)
        bl      el2_setup               // if in EL2 drop to EL1 cleanly
        /* enable the MMU early - so we can access sleep_save_stash by va */
        adr_l   lr, __enable_mmu        /* __cpu_setup will return here */
-       ldr     x27, =_cpu_resume       /* __enable_mmu will branch here */
+       adr_l   x27, _resume_switched   /* __enable_mmu will branch here */
        adrp    x25, idmap_pg_dir
        adrp    x26, swapper_pg_dir
        b       __cpu_setup
 ENDPROC(cpu_resume)
 
+       .pushsection    ".idmap.text", "ax"
+_resume_switched:
+       ldr     x8, =_cpu_resume
+       br      x8
+ENDPROC(_resume_switched)
+       .ltorg
+       .popsection
+
 ENTRY(_cpu_resume)
        mrs     x1, mpidr_el1
        adrp    x8, mpidr_hash
index 76a6d92..d93d433 100644 (file)
@@ -661,9 +661,9 @@ void __init smp_init_cpus(void)
                acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
                                      acpi_parse_gic_cpu_interface, 0);
 
-       if (cpu_count > NR_CPUS)
-               pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n",
-                       cpu_count, NR_CPUS);
+       if (cpu_count > nr_cpu_ids)
+               pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
+                       cpu_count, nr_cpu_ids);
 
        if (!bootcpu_valid) {
                pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
@@ -677,7 +677,7 @@ void __init smp_init_cpus(void)
         * with entries in cpu_logical_map while initializing the cpus.
         * If the cpu set-up fails, invalidate the cpu_logical_map entry.
         */
-       for (i = 1; i < NR_CPUS; i++) {
+       for (i = 1; i < nr_cpu_ids; i++) {
                if (cpu_logical_map(i) != INVALID_HWID) {
                        if (smp_cpu_setup(i))
                                cpu_logical_map(i) = INVALID_HWID;
index ae7855f..5a84b45 100644 (file)
@@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 
        /*
         * We must restore the 32-bit state before the sysregs, thanks
-        * to Cortex-A57 erratum #852523.
+        * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
         */
        __sysreg32_restore_state(vcpu);
        __sysreg_restore_guest_state(guest_ctxt);
index b0b225c..e51367d 100644 (file)
@@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  *
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
  * Debug handling: We do trap most, if not all debug related system
  * registers. The implementation is good enough to ensure that a guest
  * can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = {
        { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 
        /* ICC_SRE */
-       { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+       { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
 
        { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
index f94b80e..9c3e75d 100644 (file)
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 {
-       pte_t *pte = pte_offset_kernel(pmd, 0);
+       pte_t *pte = pte_offset_kernel(pmd, 0UL);
        unsigned long addr;
        unsigned i;
 
@@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 
 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 {
-       pmd_t *pmd = pmd_offset(pud, 0);
+       pmd_t *pmd = pmd_offset(pud, 0UL);
        unsigned long addr;
        unsigned i;
 
@@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 
 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 {
-       pud_t *pud = pud_offset(pgd, 0);
+       pud_t *pud = pud_offset(pgd, 0UL);
        unsigned long addr;
        unsigned i;
 
index c8beaa0..05d2bd7 100644 (file)
@@ -153,6 +153,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 }
 #endif
 
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+       return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
 /*
  * The kernel tried to access some page that wasn't present.
  */
@@ -161,8 +166,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
 {
        /*
         * Are we prepared to handle this kernel fault?
+        * We are almost certainly not prepared to handle instruction faults.
         */
-       if (fixup_exception(regs))
+       if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
                return;
 
        /*
@@ -267,7 +273,8 @@ static inline bool is_permission_fault(unsigned int esr)
        unsigned int ec       = ESR_ELx_EC(esr);
        unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 
-       return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+       return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
+              (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
 }
 
 static bool is_el0_instruction_abort(unsigned int esr)
@@ -312,6 +319,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                if (regs->orig_addr_limit == KERNEL_DS)
                        die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
+               if (is_el1_instruction_abort(esr))
+                       die("Attempting to execute userspace memory", regs, esr);
+
                if (!search_exception_tables(regs->pc))
                        die("Accessing user space memory outside uaccess.h routines", regs, esr);
        }
index c7fe3ec..5bb15ea 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 
+#include <asm/acpi.h>
+
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 nodemask_t numa_nodes_parsed __initdata;
index c6db52b..10c5777 100644 (file)
@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
        .leda = RPC_LED_100_10,
        .ledb = RPC_LED_TX_RX,
 };
index f35525b..57d1c43 100644 (file)
@@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
        .leda = RPC_LED_100_10,
        .ledb = RPC_LED_TX_RX,
 };
index 2e221c5..f86918a 100644 (file)
@@ -3,6 +3,8 @@
 
 #ifdef __KERNEL__
 
+#include <linux/types.h>
+
 /* H8/300 internal I/O functions */
 
 #define __raw_readb __raw_readb
index 6a15083..18ca6a9 100644 (file)
@@ -52,6 +52,7 @@ config IA64
        select MODULES_USE_ELF_RELA
        select ARCH_USE_CMPXCHG_LOCKREF
        select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HARDENED_USERCOPY
        default y
        help
          The Itanium Processor Family is Intel's 64-bit successor to
index 2189d5d..465c709 100644 (file)
@@ -241,12 +241,18 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
+       if (!__builtin_constant_p(count))
+               check_object_size(from, count, true);
+
        return __copy_user(to, (__force void __user *) from, count);
 }
 
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
+       if (!__builtin_constant_p(count))
+               check_object_size(to, count, false);
+
        return __copy_user((__force void __user *) to, from, count);
 }
 
@@ -258,8 +264,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
        const void *__cu_from = (from);                                                 \
        long __cu_len = (n);                                                            \
                                                                                        \
-       if (__access_ok(__cu_to, __cu_len, get_fs()))                                   \
-               __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);   \
+       if (__access_ok(__cu_to, __cu_len, get_fs())) {                                 \
+               if (!__builtin_constant_p(n))                                           \
+                       check_object_size(__cu_from, __cu_len, true);                   \
+               __cu_len = __copy_user(__cu_to, (__force void __user *)  __cu_from, __cu_len);  \
+       }                                                                               \
        __cu_len;                                                                       \
 })
 
@@ -270,8 +279,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
        long __cu_len = (n);                                                            \
                                                                                        \
        __chk_user_ptr(__cu_from);                                                      \
-       if (__access_ok(__cu_from, __cu_len, get_fs()))                                 \
+       if (__access_ok(__cu_from, __cu_len, get_fs())) {                               \
+               if (!__builtin_constant_p(n))                                           \
+                       check_object_size(__cu_to, __cu_len, false);                    \
                __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);   \
+       }                                                                               \
        __cu_len;                                                                       \
 })
 
index 2dcee3a..9202f82 100644 (file)
@@ -213,7 +213,6 @@ static inline int frame_extra_sizes(int f)
 
 static inline void adjustformat(struct pt_regs *regs)
 {
-       ((struct switch_stack *)regs - 1)->a5 = current->mm->start_data;
        /*
         * set format byte to make stack appear modulo 4, which it will
         * be when doing the rte
index 11fa51c..c0ec116 100644 (file)
@@ -390,7 +390,6 @@ void __init mem_init(void)
 
        free_all_bootmem();
        mem_init_print_info(NULL);
-       show_mem(0);
 }
 
 void free_initmem(void)
index ea0cd97..5f98759 100644 (file)
@@ -164,7 +164,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
  */
 static inline unsigned long ___pa(unsigned long x)
 {
-       if (config_enabled(CONFIG_64BIT)) {
+       if (IS_ENABLED(CONFIG_64BIT)) {
                /*
                 * For MIPS64 the virtual address may either be in one of
                 * the compatibility segements ckseg0 or ckseg1, or it may
@@ -173,7 +173,7 @@ static inline unsigned long ___pa(unsigned long x)
                return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
        }
 
-       if (!config_enabled(CONFIG_EVA)) {
+       if (!IS_ENABLED(CONFIG_EVA)) {
                /*
                 * We're using the standard MIPS32 legacy memory map, ie.
                 * the address x is going to be in kseg0 or kseg1. We can
index 6eb52b9..e788515 100644 (file)
@@ -1642,8 +1642,14 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
 
        preempt_disable();
        if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
-               if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
-                       kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
+               if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
+                   kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
+                       kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
+                               __func__, va, vcpu, read_c0_entryhi());
+                       er = EMULATE_FAIL;
+                       preempt_enable();
+                       goto done;
+               }
        } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
                   KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
                int index;
@@ -1680,12 +1686,18 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
                                                                run, vcpu);
                                preempt_enable();
                                goto dont_update_pc;
-                       } else {
-                               /*
-                                * We fault an entry from the guest tlb to the
-                                * shadow host TLB
-                                */
-                               kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+                       }
+                       /*
+                        * We fault an entry from the guest tlb to the
+                        * shadow host TLB
+                        */
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+                               kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, va, index, vcpu,
+                                       read_c0_entryhi());
+                               er = EMULATE_FAIL;
+                               preempt_enable();
+                               goto done;
                        }
                }
        } else {
@@ -2659,7 +2671,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
                         * OK we have a Guest TLB entry, now inject it into the
                         * shadow host TLB
                         */
-                       kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+                               kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, va, index, vcpu,
+                                       read_c0_entryhi());
+                               er = EMULATE_FAIL;
+                       }
                }
        }
 
index 57319ee..121008c 100644 (file)
@@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
        srcu_idx = srcu_read_lock(&kvm->srcu);
        pfn = gfn_to_pfn(kvm, gfn);
 
-       if (is_error_pfn(pfn)) {
+       if (is_error_noslot_pfn(pfn)) {
                kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
                err = -EFAULT;
                goto out;
@@ -99,7 +99,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
        }
 
        gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
-       if (gfn >= kvm->arch.guest_pmap_npages) {
+       if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
                kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
                        gfn, badvaddr);
                kvm_mips_dump_host_tlbs();
@@ -138,35 +138,49 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
        unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
        struct kvm *kvm = vcpu->kvm;
        kvm_pfn_t pfn0, pfn1;
+       gfn_t gfn0, gfn1;
+       long tlb_lo[2];
        int ret;
 
-       if ((tlb->tlb_hi & VPN2_MASK) == 0) {
-               pfn0 = 0;
-               pfn1 = 0;
-       } else {
-               if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
-                                          >> PAGE_SHIFT) < 0)
-                       return -1;
-
-               if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
-                                          >> PAGE_SHIFT) < 0)
-                       return -1;
-
-               pfn0 = kvm->arch.guest_pmap[
-                       mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
-               pfn1 = kvm->arch.guest_pmap[
-                       mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
+       tlb_lo[0] = tlb->tlb_lo[0];
+       tlb_lo[1] = tlb->tlb_lo[1];
+
+       /*
+        * The commpage address must not be mapped to anything else if the guest
+        * TLB contains entries nearby, or commpage accesses will break.
+        */
+       if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
+                       VPN2_MASK & (PAGE_MASK << 1)))
+               tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
+
+       gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
+       gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
+       if (gfn0 >= kvm->arch.guest_pmap_npages ||
+           gfn1 >= kvm->arch.guest_pmap_npages) {
+               kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
+                       __func__, gfn0, gfn1, tlb->tlb_hi);
+               kvm_mips_dump_guest_tlbs(vcpu);
+               return -1;
        }
 
+       if (kvm_mips_map_page(kvm, gfn0) < 0)
+               return -1;
+
+       if (kvm_mips_map_page(kvm, gfn1) < 0)
+               return -1;
+
+       pfn0 = kvm->arch.guest_pmap[gfn0];
+       pfn1 = kvm->arch.guest_pmap[gfn1];
+
        /* Get attributes from the Guest TLB */
        entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
                ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-               (tlb->tlb_lo[0] & ENTRYLO_D) |
-               (tlb->tlb_lo[0] & ENTRYLO_V);
+               (tlb_lo[0] & ENTRYLO_D) |
+               (tlb_lo[0] & ENTRYLO_V);
        entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
                ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-               (tlb->tlb_lo[1] & ENTRYLO_D) |
-               (tlb->tlb_lo[1] & ENTRYLO_V);
+               (tlb_lo[1] & ENTRYLO_D) |
+               (tlb_lo[1] & ENTRYLO_V);
 
        kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
                  tlb->tlb_lo[0], tlb->tlb_lo[1]);
@@ -354,9 +368,15 @@ u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu)
                                local_irq_restore(flags);
                                return KVM_INVALID_INST;
                        }
-                       kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
-                                                            &vcpu->arch.
-                                                            guest_tlb[index]);
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+                                               &vcpu->arch.guest_tlb[index])) {
+                               kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, opc, index, vcpu,
+                                       read_c0_entryhi());
+                               kvm_mips_dump_guest_tlbs(vcpu);
+                               local_irq_restore(flags);
+                               return KVM_INVALID_INST;
+                       }
                        inst = *(opc);
                }
                local_irq_restore(flags);
index c0ae625..274d5bc 100644 (file)
 #define        ENOTCONN        235     /* Transport endpoint is not connected */
 #define        ESHUTDOWN       236     /* Cannot send after transport endpoint shutdown */
 #define        ETOOMANYREFS    237     /* Too many references: cannot splice */
-#define EREFUSED       ECONNREFUSED    /* for HP's NFS apparently */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
-#define EREMOTERELEASE 240     /* Remote peer released connection */
+#define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
+#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index 5adc339..0c2a94a 100644 (file)
@@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency);
 
 DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
-extern int update_cr16_clocksource(void);      /* from time.c */
-
 /*
 **     PARISC CPU driver - claim "device" and initialize CPU data structures.
 **
@@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev)
        }
 #endif
 
-       /* If we've registered more than one cpu,
-        * we'll use the jiffies clocksource since cr16
-        * is not synchronized between CPUs.
-        */
-       update_cr16_clocksource();
-
        return 0;
 }
 
index 505cf1a..4b0b963 100644 (file)
@@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = {
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-int update_cr16_clocksource(void)
-{
-       /* since the cr16 cycle counters are not synchronized across CPUs,
-          we'll check if we should switch to a safe clocksource: */
-       if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) {
-               clocksource_change_rating(&clocksource_cr16, 0);
-               return 1;
-       }
-
-       return 0;
-}
-
 void __init start_cpu_itimer(void)
 {
        unsigned int cpu = smp_processor_id();
index ec4047e..927d2ab 100644 (file)
@@ -166,6 +166,7 @@ config PPC
        select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
        select GENERIC_CPU_AUTOPROBE
        select HAVE_VIRT_CPU_ACCOUNTING
+       select HAVE_ARCH_HARDENED_USERCOPY
 
 config GENERIC_CSUM
        def_bool CPU_LITTLE_ENDIAN
index ca25454..1934707 100644 (file)
@@ -66,29 +66,28 @@ endif
 UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC    += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC    += -mno-strict-align
-endif
-override AS    += -mlittle-endian
 override LD    += -EL
-override CROSS32CC += -mlittle-endian
 override CROSS32AS += -mlittle-endian
 LDEMULATION    := lppc
 GNUTARGET      := powerpcle
 MULTIPLEWORD   := -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC    += -mbig-endian
-override AS    += -mbig-endian
-endif
 override LD    += -EB
 LDEMULATION    := ppc
 GNUTARGET      := powerpc
 MULTIPLEWORD   := -mmultiple
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+ifneq ($(cc-name),clang)
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+
 ifeq ($(HAS_BIARCH),y)
 override AS    += -a$(CONFIG_WORD_SIZE)
 override LD    += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200)               += -Wa,-me200
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
 
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
 head-y                         := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
 head-$(CONFIG_8xx)             := arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)             := arch/powerpc/kernel/head_40x.o
index bfe3d37..9fa046d 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/cpufeature.h>
 #include <asm/switch_to.h>
 
 #define CHKSUM_BLOCK_SIZE      1
@@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void)
        crypto_unregister_shash(&alg);
 }
 
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
 module_exit(crc32c_vpmsum_mod_fini);
 
 MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
index 3d7fc06..01b8a13 100644 (file)
@@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state;
 
 #endif
 
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
+       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
+       std     r0,0(r1);                                       \
+       ptesync;                                                \
+       ld      r0,0(r1);                                       \
+1:     cmp     cr0,r0,r0;                                      \
+       bne     1b;                                             \
+       IDLE_INST;                                              \
+       b       .
+#endif /* CONFIG_PPC_P7_NAP */
+
 #endif
index 666bef4..9377bdf 100644 (file)
@@ -3,6 +3,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * Mapping of threads to cores
index 57fec8a..ddf54f5 100644 (file)
@@ -186,6 +186,7 @@ label##3:                                           \
 
 #ifndef __ASSEMBLY__
 void apply_feature_fixups(void);
+void setup_feature_keys(void);
 #endif
 
 #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
index 88b4901..85b7a1a 100644 (file)
@@ -21,7 +21,7 @@
 #ifndef __ASM_PPC64_HMI_H__
 #define __ASM_PPC64_HMI_H__
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 
 #define        CORE_TB_RESYNC_REQ_BIT          63
 #define MAX_SUBCORE_PER_CORE           4
index 148303e..6a6792b 100644 (file)
@@ -183,11 +183,6 @@ struct paca_struct {
         */
        u16 in_mce;
        u8 hmi_event_available;          /* HMI event is available */
-       /*
-        * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
-        * more details
-        */
-       struct sibling_subcore_state *sibling_subcore_state;
 #endif
 
        /* Stuff for accurate time accounting */
@@ -202,6 +197,13 @@ struct paca_struct {
        struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 #endif
        struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       /*
+        * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+        * more details
+        */
+       struct sibling_subcore_state *sibling_subcore_state;
+#endif
 #endif
 };
 
index b5e88e4..c0309c5 100644 (file)
@@ -301,6 +301,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 /* Allocate & free a PCI host bridge structure */
 extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
 extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
 
 #ifdef CONFIG_PCI
 extern int pcibios_vaddr_is_ioport(void __iomem *address);
index 0a74ebe..17c8380 100644 (file)
@@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void)
 static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
 static inline void clear_task_ebb(struct task_struct *t)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
index b7c20f0..c1dc6c1 100644 (file)
@@ -310,10 +310,15 @@ static inline unsigned long copy_from_user(void *to,
 {
        unsigned long over;
 
-       if (access_ok(VERIFY_READ, from, n))
+       if (access_ok(VERIFY_READ, from, n)) {
+               if (!__builtin_constant_p(n))
+                       check_object_size(to, n, false);
                return __copy_tofrom_user((__force void __user *)to, from, n);
+       }
        if ((unsigned long)from < TASK_SIZE) {
                over = (unsigned long)from + n - TASK_SIZE;
+               if (!__builtin_constant_p(n - over))
+                       check_object_size(to, n - over, false);
                return __copy_tofrom_user((__force void __user *)to, from,
                                n - over) + over;
        }
@@ -325,10 +330,15 @@ static inline unsigned long copy_to_user(void __user *to,
 {
        unsigned long over;
 
-       if (access_ok(VERIFY_WRITE, to, n))
+       if (access_ok(VERIFY_WRITE, to, n)) {
+               if (!__builtin_constant_p(n))
+                       check_object_size(from, n, true);
                return __copy_tofrom_user(to, (__force void __user *)from, n);
+       }
        if ((unsigned long)to < TASK_SIZE) {
                over = (unsigned long)to + n - TASK_SIZE;
+               if (!__builtin_constant_p(n))
+                       check_object_size(from, n - over, true);
                return __copy_tofrom_user(to, (__force void __user *)from,
                                n - over) + over;
        }
@@ -372,6 +382,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
                if (ret == 0)
                        return 0;
        }
+
+       if (!__builtin_constant_p(n))
+               check_object_size(to, n, false);
+
        return __copy_tofrom_user((__force void __user *)to, from, n);
 }
 
@@ -398,6 +412,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
                if (ret == 0)
                        return 0;
        }
+       if (!__builtin_constant_p(n))
+               check_object_size(from, n, true);
+
        return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
 
index f5f729c..f0b2385 100644 (file)
@@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void);
 extern void xics_kexec_teardown_cpu(int secondary);
 extern void xics_migrate_irqs_away(void);
 extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
 #ifdef CONFIG_SMP
 extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
                               unsigned int strict_check);
index b2027a5..fe4c075 100644 (file)
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32)          += vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o hmi.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)    += exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)            += vdso64/
 obj-$(CONFIG_ALTIVEC)          += vecemu.o
index c9bc78e..7429556 100644 (file)
@@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
        int n = 0, l = 0;
        char buffer[128];
 
-       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
                       edev->phb->global_number, pdn->busno,
                       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
-       pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+       pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
                edev->phb->global_number, pdn->busno,
                PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
index 6b8bc0d..5afd03e 100644 (file)
@@ -368,13 +368,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 tabort_syscall:
        /* Firstly we need to enable TM in the kernel */
        mfmsr   r10
-       li      r13, 1
-       rldimi  r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+       li      r9, 1
+       rldimi  r10, r9, MSR_TM_LG, 63-MSR_TM_LG
        mtmsrd  r10, 0
 
        /* tabort, this dooms the transaction, nothing else */
-       li      r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-       TABORT(R13)
+       li      r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+       TABORT(R9)
 
        /*
         * Return directly to userspace. We have corrupted user register state,
@@ -382,8 +382,8 @@ tabort_syscall:
         * resume after the tbegin of the aborted transaction with the
         * checkpointed register state.
         */
-       li      r13, MSR_RI
-       andc    r10, r10, r13
+       li      r9, MSR_RI
+       andc    r10, r10, r9
        mtmsrd  r10, 1
        mtspr   SPRN_SRR0, r11
        mtspr   SPRN_SRR1, r12
index 41091fd..bffec73 100644 (file)
@@ -144,29 +144,14 @@ machine_check_pSeries_1:
         * vector
         */
        SET_SCRATCH0(r13)               /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-       /* Running native on arch 2.06 or later, check if we are
-        * waking up from nap. We only handle no state loss and
-        * supervisor state loss. We do -not- handle hypervisor
-        * state loss at this time.
+       /*
+        * Running native on arch 2.06 or later, we may wakeup from winkle
+        * inside machine check. If yes, then last bit of HSPGR0 would be set
+        * to 1. Hence clear it unconditionally.
         */
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       beq     9f
-
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       /* waking up from powersave (nap) state */
-       cmpwi   cr1,r13,2
-       /* Total loss of HV state is fatal. let's just stay stuck here */
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       bgt     cr1,.
-9:
-       OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+       GET_PACA(r13)
+       clrrdi  r13,r13,1
+       SET_PACA(r13)
        EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
        b       machine_check_powernv_early
@@ -500,7 +485,23 @@ machine_check_fwnmi:
        EXCEPTION_PROLOG_0(PACA_EXMC)
 machine_check_pSeries_0:
        EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
-       EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
+       /*
+        * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
+        * difference that MSR_RI is not enabled, because PACA_EXMC is being
+        * used, so nested machine check corrupts it. machine_check_common
+        * enables MSR_RI.
+        */
+       ld      r12,PACAKBASE(r13)
+       ld      r10,PACAKMSR(r13)
+       xori    r10,r10,MSR_RI
+       mfspr   r11,SPRN_SRR0
+       LOAD_HANDLER(r12, machine_check_common)
+       mtspr   SPRN_SRR0,r12
+       mfspr   r12,SPRN_SRR1
+       mtspr   SPRN_SRR1,r10
+       rfid
+       b       .       /* prevent speculative execution */
+
        KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
        KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
        KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -984,14 +985,17 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 machine_check_common:
 
        mfspr   r10,SPRN_DAR
-       std     r10,PACA_EXGEN+EX_DAR(r13)
+       std     r10,PACA_EXMC+EX_DAR(r13)
        mfspr   r10,SPRN_DSISR
-       stw     r10,PACA_EXGEN+EX_DSISR(r13)
+       stw     r10,PACA_EXMC+EX_DSISR(r13)
        EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
        FINISH_NAP
        RECONCILE_IRQ_STATE(r10, r11)
-       ld      r3,PACA_EXGEN+EX_DAR(r13)
-       lwz     r4,PACA_EXGEN+EX_DSISR(r13)
+       ld      r3,PACA_EXMC+EX_DAR(r13)
+       lwz     r4,PACA_EXMC+EX_DSISR(r13)
+       /* Enable MSR_RI when finished with PACA_EXMC */
+       li      r10,MSR_RI
+       mtmsrd  r10,1
        std     r3,_DAR(r1)
        std     r4,_DSISR(r1)
        bl      save_nvgprs
@@ -1273,25 +1277,51 @@ machine_check_handle_early:
         * Check if thread was in power saving mode. We come here when any
         * of the following is true:
         * a. thread wasn't in power saving mode
-        * b. thread was in power saving mode with no state loss or
-        *    supervisor state loss
+        * b. thread was in power saving mode with no state loss,
+        *    supervisor state loss or hypervisor state loss.
         *
-        * Go back to nap again if (b) is true.
+        * Go back to nap/sleep/winkle mode again if (b) is true.
         */
        rlwinm. r11,r12,47-31,30,31     /* Was it in power saving mode? */
        beq     4f                      /* No, it wasn;t */
        /* Thread was in power saving mode. Go back to nap again. */
        cmpwi   r11,2
-       bne     3f
-       /* Supervisor state loss */
+       blt     3f
+       /* Supervisor/Hypervisor state loss */
        li      r0,1
        stb     r0,PACA_NAPSTATELOST(r13)
 3:     bl      machine_check_queue_event
        MACHINE_CHECK_HANDLER_WINDUP
        GET_PACA(r13)
        ld      r1,PACAR1(r13)
-       li      r3,PNV_THREAD_NAP
-       b       pnv_enter_arch207_idle_mode
+       /*
+        * Check what idle state this CPU was in and go back to same mode
+        * again.
+        */
+       lbz     r3,PACA_THREAD_IDLE_STATE(r13)
+       cmpwi   r3,PNV_THREAD_NAP
+       bgt     10f
+       IDLE_STATE_ENTER_SEQ(PPC_NAP)
+       /* No return */
+10:
+       cmpwi   r3,PNV_THREAD_SLEEP
+       bgt     2f
+       IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+       /* No return */
+
+2:
+       /*
+        * Go back to winkle. Please note that this thread was woken up in
+        * machine check from winkle and have not restored the per-subcore
+        * state. Hence before going back to winkle, set last bit of HSPGR0
+        * to 1. This will make sure that if this thread gets woken up
+        * again at reset vector 0x100 then it will get chance to restore
+        * the subcore state.
+        */
+       ori     r13,r13,1
+       SET_PACA(r13)
+       IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+       /* No return */
 4:
 #endif
        /*
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c
deleted file mode 100644 (file)
index e3f738e..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Hypervisor Maintenance Interrupt (HMI) handling.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- *
- * Copyright 2015 IBM Corporation
- * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
- */
-
-#undef DEBUG
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-#include <asm/paca.h>
-#include <asm/hmi.h>
-
-void wait_for_subcore_guest_exit(void)
-{
-       int i;
-
-       /*
-        * NULL bitmap pointer indicates that KVM module hasn't
-        * been loaded yet and hence no guests are running.
-        * If no KVM is in use, no need to co-ordinate among threads
-        * as all of them will always be in host and no one is going
-        * to modify TB other than the opal hmi handler.
-        * Hence, just return from here.
-        */
-       if (!local_paca->sibling_subcore_state)
-               return;
-
-       for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
-               while (local_paca->sibling_subcore_state->in_guest[i])
-                       cpu_relax();
-}
-
-void wait_for_tb_resync(void)
-{
-       if (!local_paca->sibling_subcore_state)
-               return;
-
-       while (test_bit(CORE_TB_RESYNC_REQ_BIT,
-                               &local_paca->sibling_subcore_state->flags))
-               cpu_relax();
-}
index ba79d15..2265c63 100644 (file)
                                PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
                                PSSCR_MTL_MASK
 
-/* Idle state entry routines */
-
-#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
-       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
-       std     r0,0(r1);                                       \
-       ptesync;                                                \
-       ld      r0,0(r1);                                       \
-1:     cmp     cr0,r0,r0;                                      \
-       bne     1b;                                             \
-       IDLE_INST;                                              \
-       b       .
-
        .text
 
 /*
@@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop)
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
 _GLOBAL(pnv_restore_hyp_resource)
-       ld      r2,PACATOC(r13);
 BEGIN_FTR_SECTION
+       ld      r2,PACATOC(r13);
        /*
         * POWER ISA 3. Use PSSCR to determine if we
         * are waking up from deep idle state
@@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         */
        clrldi  r5,r13,63
        clrrdi  r13,r13,1
+
+       /* Now that we are sure r13 is corrected, load TOC */
+       ld      r2,PACATOC(r13);
        cmpwi   cr4,r5,1
        mtspr   SPRN_HSPRG0,r13
 
index 3ed8ec0..e785cc9 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 #include <asm/code-patching.h>
index ef267fd..5e7ece0 100644 (file)
@@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
        mce->in_use = 1;
 
        mce->initiator = MCE_INITIATOR_CPU;
-       if (handled)
+       /* Mark it recovered if we have handled it and MSR(RI=1). */
+       if (handled && (regs->msr & MSR_RI))
                mce->disposition = MCE_DISPOSITION_RECOVERED;
        else
                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
index a5c0153..e589080 100644 (file)
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops);
 static int get_phb_number(struct device_node *dn)
 {
        int ret, phb_id = -1;
+       u32 prop_32;
        u64 prop;
 
        /*
@@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn)
         * reading "ibm,opal-phbid", only present in OPAL environment.
         */
        ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
-       if (ret)
-               ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+       if (ret) {
+               ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+               prop = prop_32;
+       }
 
        if (!ret)
                phb_id = (int)(prop & (MAX_PHBS - 1));
@@ -150,6 +153,42 @@ void pcibios_free_controller(struct pci_controller *phb)
 }
 EXPORT_SYMBOL_GPL(pcibios_free_controller);
 
+/*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ *                             pcibios_free_controller_deferred
+ *                             (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+       struct pci_controller *phb = (struct pci_controller *)
+                                        bridge->release_data;
+
+       pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+       pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
+
 /*
  * The function is used to return the minimal alignment
  * for memory or I/O windows of the associated P2P bridge.
index 58ccf86..9ee2623 100644 (file)
@@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
 #endif
 }
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
-       /*
-        * Process self tracing is not yet supported through
-        * ptrace interface. Ptrace generic code should have
-        * prevented this from happening in the first place.
-        * Warn once here with the message, if some how it
-        * is attempted.
-        */
-       WARN_ONCE(tsk == current,
-               "Not expecting ptrace on self: TM regs may be incorrect\n");
-
-       /*
-        * If task is not current, it should have been flushed
-        * already to it's thread_struct during __switch_to().
-        */
-}
-#endif
-
 struct task_struct *__switch_to(struct task_struct *prev,
        struct task_struct *new)
 {
index 6ee4b72..d3eff99 100644 (file)
@@ -695,7 +695,7 @@ unsigned char ibm_architecture_vec[] = {
        OV4_MIN_ENT_CAP,                /* minimum VP entitled capacity */
 
        /* option vector 5: PAPR/OF options */
-       VECTOR_LENGTH(18),              /* length */
+       VECTOR_LENGTH(21),              /* length */
        0,                              /* don't ignore, don't halt */
        OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
        OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
@@ -726,8 +726,11 @@ unsigned char ibm_architecture_vec[] = {
        0,
        0,
        OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
-       OV5_FEAT(OV5_PFO_HW_842),
-       OV5_FEAT(OV5_SUB_PROCESSORS),
+       OV5_FEAT(OV5_PFO_HW_842),                               /* Byte 17 */
+       0,                                                      /* Byte 18 */
+       0,                                                      /* Byte 19 */
+       0,                                                      /* Byte 20 */
+       OV5_FEAT(OV5_SUB_PROCESSORS),                           /* Byte 21 */
 
        /* option vector 6: IBM PAPR hints */
        VECTOR_LENGTH(3),               /* length */
@@ -2940,7 +2943,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 
        /* Don't print anything after quiesce under OPAL, it crashes OFW */
        if (of_platform != PLATFORM_OPAL) {
-               prom_printf("Booting Linux via __start() ...\n");
+               prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
                prom_debug("->dt_header_start=0x%x\n", hdr);
        }
 
index 4f3c575..bf91658 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_END,
 };
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+       /*
+        * If task is not current, it will have been flushed already to
+        * it's thread_struct during __switch_to().
+        *
+        * A reclaim flushes ALL the state.
+        */
+
+       if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
 /**
  * regs_query_register_offset() - query register offset from its name
  * @name:      the name of a register
index c3e861d..24ec3ea 100644 (file)
@@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
  * and we are running with enough of the MMU enabled to have our
  * proper kernel virtual addresses
  *
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
  */
 extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
 
 notrace void __init machine_init(u64 dt_ptr)
 {
+       /* Configure static keys first, now that we're relocated. */
+       setup_feature_keys();
+
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
index eafb9a7..7ac8e6e 100644 (file)
@@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
 
        /* Apply all the dynamic patching */
        apply_feature_fixups();
+       setup_feature_keys();
 
        /* Initialize the hash table or TLB handling */
        early_init_mmu();
index b6aa378..a7daf74 100644 (file)
@@ -1226,7 +1226,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
                (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
        if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
                goto bad;
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       /*
+        * If there is a transactional state then throw it away.
+        * The purpose of a sigreturn is to destroy all traces of the
+        * signal frame, this includes any transactional state created
+        * within in. We only check for suspended as we can never be
+        * active in the kernel, we are active, there is nothing better to
+        * do than go ahead and Bad Thing later.
+        * The cause is not important as there will never be a
+        * recheckpoint so it's not user visible.
+        */
+       if (MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(0);
+
        if (__get_user(tmp, &rt_sf->uc.uc_link))
                goto bad;
        uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
index 7e49984..70409bb 100644 (file)
@@ -676,7 +676,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
        if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
                goto badframe;
        set_current_blocked(&set);
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       /*
+        * If there is a transactional state then throw it away.
+        * The purpose of a sigreturn is to destroy all traces of the
+        * signal frame, this includes any transactional state created
+        * within in. We only check for suspended as we can never be
+        * active in the kernel, we are active, there is nothing better to
+        * do than go ahead and Bad Thing later.
+        * The cause is not important as there will never be a
+        * recheckpoint so it's not user visible.
+        */
+       if (MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(0);
+
        if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
                goto badframe;
        if (MSR_TM_ACTIVE(msr)) {
index 25a3905..9c6f3fd 100644 (file)
@@ -830,7 +830,7 @@ int __cpu_disable(void)
 
        /* Update sibling maps */
        base = cpu_first_thread_sibling(cpu);
-       for (i = 0; i < threads_per_core; i++) {
+       for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
                cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
                cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
                cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
index 2cb5892..62859eb 100644 (file)
@@ -25,7 +25,8 @@
 #include <linux/user.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/module.h>      /* print_modules */
 #include <linux/prctl.h>
 #include <linux/delay.h>
 #include <linux/kprobes.h>
index 6767605..4111d30 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 
+#include <asm/cpu_has_feature.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
index cbabd14..78a7449 100644 (file)
@@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
        $(call if_changed,vdso32ld)
 
 # strip rule for the .so file
@@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
        $(call if_changed_dep,vdso32as)
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
 
index c710802..366ae09 100644 (file)
@@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
        $(call if_changed,vdso64ld)
 
 # strip rule for the .so file
@@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
        $(call if_changed_dep,vdso64as)
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
index 1f9e552..855d4b9 100644 (file)
@@ -78,6 +78,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+       book3s_hv_hmi.o \
        book3s_hv_rmhandlers.o \
        book3s_hv_rm_mmu.o \
        book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
new file mode 100644 (file)
index 0000000..e3f738e
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+       int i;
+
+       /*
+        * NULL bitmap pointer indicates that KVM module hasn't
+        * been loaded yet and hence no guests are running.
+        * If no KVM is in use, no need to co-ordinate among threads
+        * as all of them will always be in host and no one is going
+        * to modify TB other than the opal hmi handler.
+        * Hence, just return from here.
+        */
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+               while (local_paca->sibling_subcore_state->in_guest[i])
+                       cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+                               &local_paca->sibling_subcore_state->flags))
+               cpu_relax();
+}
index a75ba38..05aa113 100644 (file)
@@ -1329,20 +1329,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
        xics->kvm = kvm;
 
        /* Already there ? */
-       mutex_lock(&kvm->lock);
        if (kvm->arch.xics)
                ret = -EEXIST;
        else
                kvm->arch.xics = xics;
-       mutex_unlock(&kvm->lock);
 
        if (ret) {
                kfree(xics);
                return ret;
        }
 
-       xics_debugfs_init(xics);
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        if (cpu_has_feature(CPU_FTR_ARCH_206)) {
                /* Enable real mode support */
@@ -1354,9 +1350,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
        return 0;
 }
 
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+       struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
+
+       xics_debugfs_init(xics);
+}
+
 struct kvm_device_ops kvm_xics_ops = {
        .name = "kvm-xics",
        .create = kvmppc_xics_create,
+       .init = kvmppc_xics_init,
        .destroy = kvmppc_xics_free,
        .set_attr = xics_set_attr,
        .get_attr = xics_get_attr,
index d90870a..0a57fe6 100644 (file)
@@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic)
        stw     r7,12(r1)
        stw     r8,8(r1)
 
-       andi.   r0,r4,1                 /* is destination address even ? */
-       cmplwi  cr7,r0,0
+       rlwinm  r0,r4,3,0x8
+       rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
+       cmplwi  cr7,r0,0        /* is destination address even ? */
        addic   r12,r6,0
        addi    r6,r4,-4
        neg     r0,r4
@@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic)
 66:    addze   r3,r12
        addi    r1,r1,16
        beqlr+  cr7
-       rlwinm  r3,r3,8,0,31    /* swap bytes for odd destination */
+       rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
        blr
 
 /* read fault */
index 74145f0..043415f 100644 (file)
@@ -188,7 +188,10 @@ void __init apply_feature_fixups(void)
                          &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
        do_final_fixups();
+}
 
+void __init setup_feature_keys(void)
+{
        /*
         * Initialise jump label. This causes all the cpu/mmu_has_feature()
         * checks to take on their correct polarity based on the current set of
index a4db22f..bb1ffc5 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
index 8eb82b0..d93dd4a 100644 (file)
@@ -528,7 +528,6 @@ static struct platform_driver mpc512x_lpbfifo_driver = {
        .remove = mpc512x_lpbfifo_remove,
        .driver = {
                .name = DRV_NAME,
-               .owner = THIS_MODULE,
                .of_match_table = mpc512x_lpbfifo_match,
        },
 };
index dbcd030..63c5ab6 100644 (file)
@@ -222,7 +222,6 @@ static const struct of_device_id mcu_of_match_table[] = {
 static struct i2c_driver mcu_driver = {
        .driver = {
                .name = "mcu-mpc8349emitx",
-               .owner = THIS_MODULE,
                .of_match_table = mcu_of_match_table,
        },
        .probe = mcu_probe,
index 5be15cf..2975754 100644 (file)
@@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
        gang = alloc_spu_gang();
        SPUFS_I(inode)->i_ctx = NULL;
        SPUFS_I(inode)->i_gang = gang;
-       if (!gang)
+       if (!gang) {
+               ret = -ENOMEM;
                goto out_iput;
+       }
 
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
index dafba10..dfd3100 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/tty.h>
 #include <linux/serial_core.h>
 #include <linux/of_platform.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
index 80804f9..f97bab8 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/pci.h>
 #include <linux/kdev_t.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/seq_file.h>
index 309d9cc..c61667e 100644 (file)
@@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
        if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
            !firmware_has_feature(FW_FEATURE_LPAR)) {
                dev->dev.archdata.dma_ops = &dma_direct_ops;
+               /*
+                * Set the coherent DMA mask to prevent the iommu
+                * being used unnecessarily
+                */
+               dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
                return;
        }
 #endif
index 2ee9643..4c82782 100644 (file)
@@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data)
        uint32_t dump_id, dump_size, dump_type;
        struct dump_obj *dump;
        char name[22];
+       struct kobject *kobj;
 
        rc = dump_read_info(&dump_id, &dump_size, &dump_type);
        if (rc != OPAL_SUCCESS)
@@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data)
         * that gracefully and not create two conflicting
         * entries.
         */
-       if (kset_find_obj(dump_kset, name))
+       kobj = kset_find_obj(dump_kset, name);
+       if (kobj) {
+               /* Drop reference added by kset_find_obj() */
+               kobject_put(kobj);
                return 0;
+       }
 
        dump = create_dump_obj(dump_id, dump_size, dump_type);
        if (!dump)
index 37f959b..f2344cb 100644 (file)
@@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data)
        uint64_t elog_type;
        int rc;
        char name[2+16+1];
+       struct kobject *kobj;
 
        rc = opal_get_elog_size(&id, &size, &type);
        if (rc != OPAL_SUCCESS) {
@@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data)
         * that gracefully and not create two conflicting
         * entries.
         */
-       if (kset_find_obj(elog_kset, name))
+       kobj = kset_find_obj(elog_kset, name);
+       if (kobj) {
+               /* Drop reference added by kset_find_obj() */
+               kobject_put(kobj);
                return IRQ_HANDLED;
+       }
 
        create_elog_obj(log_id, elog_size, elog_type);
 
index e505223..ed8bba6 100644 (file)
@@ -228,7 +228,8 @@ int __init opal_event_init(void)
                }
 
                /* Install interrupt handler */
-               rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+               rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+                                "opal", NULL);
                if (rc) {
                        irq_dispose_mapping(virq);
                        pr_warn("Error %d requesting irq %d (0x%x)\n",
index 8b4fc68..6c9a65b 100644 (file)
@@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs,
 
        if (!(regs->msr & MSR_RI)) {
                /* If MSR_RI isn't set, we cannot recover */
+               pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
                recovered = 0;
        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
                /* Platform corrected itself */
index 6b95283..1321826 100644 (file)
@@ -111,10 +111,17 @@ static int __init iommu_setup(char *str)
 }
 early_param("iommu", iommu_setup);
 
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
 {
-       return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
-               (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+       /*
+        * WARNING: We cannot rely on the resource flags. The Linux PCI
+        * allocation code sometimes decides to put a 64-bit prefetchable
+        * BAR in the 32-bit window, so we have to compare the addresses.
+        *
+        * For simplicity we only test resource start.
+        */
+       return (r->start >= phb->ioda.m64_base &&
+               r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
 }
 
 static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@@ -142,7 +149,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 
 static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
-       unsigned long pe = phb->ioda.total_pe_num - 1;
+       long pe;
 
        for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
                if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
@@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
        sgsz = phb->ioda.m64_segsize;
        for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
                r = &pdev->resource[i];
-               if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+               if (!r->parent || !pnv_pci_is_m64(phb, r))
                        continue;
 
                start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -1877,7 +1884,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
                                        unsigned shift, unsigned long index,
                                        unsigned long npages)
 {
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
        unsigned long start, end, inc;
 
        /* We'll invalidate DMA address in PE scope */
@@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
                res = &pdev->resource[i + PCI_IOV_RESOURCES];
                if (!res->flags || res->parent)
                        continue;
-               if (!pnv_pci_is_mem_pref_64(res->flags)) {
+               if (!pnv_pci_is_m64(phb, res)) {
                        dev_warn(&pdev->dev, "Don't support SR-IOV with"
                                        " non M64 VF BAR%d: %pR. \n",
                                 i, res);
@@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
                        index++;
                }
        } else if ((res->flags & IORESOURCE_MEM) &&
-                  !pnv_pci_is_mem_pref_64(res->flags)) {
+                  !pnv_pci_is_m64(phb, res)) {
                region.start = res->start -
                               phb->hose->mem_offset[0] -
                               phb->ioda.m32_pci_base;
@@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
                bridge = bridge->bus->self;
        }
 
-       /* We fail back to M32 if M64 isn't supported */
-       if (phb->ioda.m64_segsize &&
-           pnv_pci_is_mem_pref_64(type))
+       /*
+        * We fall back to M32 if M64 isn't supported. We enforce the M64
+        * alignment for any 64-bit resource, PCIe doesn't care and
+        * bridges only do 64-bit prefetchable anyway.
+        */
+       if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
                return phb->ioda.m64_segsize;
        if (type & IORESOURCE_MEM)
                return phb->ioda.m32_segsize;
@@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
                w = NULL;
                if (r->flags & type & IORESOURCE_IO)
                        w = &hose->io_resource;
-               else if (pnv_pci_is_mem_pref_64(r->flags) &&
+               else if (pnv_pci_is_m64(phb, r) &&
                         (type & IORESOURCE_PREFETCH) &&
                         phb->ioda.m64_segsize)
                        w = &hose->mem_resources[1];
index 43f7beb..76ec104 100644 (file)
@@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
        return dlpar_update_device_tree_lmb(lmb);
 }
 
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
-       unsigned long section_nr;
-       struct mem_section *mem_sect;
-       struct memory_block *mem_block;
-
-       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-       mem_sect = __nr_to_section(section_nr);
-
-       mem_block = find_memory_block(mem_sect);
-       return mem_block;
-}
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
 
 static int dlpar_add_lmb(struct of_drconf_cell *);
 
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+       unsigned long section_nr;
+       struct mem_section *mem_sect;
+       struct memory_block *mem_block;
+
+       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+       mem_sect = __nr_to_section(section_nr);
+
+       mem_block = find_memory_block(mem_sect);
+       return mem_block;
+}
+
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
        struct memory_block *mem_block;
index fe16a50..09eba5a 100644 (file)
@@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 
        bus = bridge->bus;
 
+       /* Rely on the pcibios_free_controller_deferred() callback. */
+       pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+                                       (void *) pci_bus_to_host(bus));
+
        dn = pcibios_get_phb_of_node(bus);
        if (!dn)
                return 0;
index 906dbaa..547fd13 100644 (file)
@@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb)
                release_resource(res);
        }
 
-       /* Free pci_controller data structure */
-       pcibios_free_controller(phb);
+       /*
+        * The pci_controller data structure is freed by
+        * the pcibios_free_controller_deferred() callback;
+        * see pseries_root_bridge_prepare().
+        */
 
        return 0;
 }
index 6c11099..81d4947 100644 (file)
@@ -534,7 +534,8 @@ struct cpm1_gpio16_chip {
 
 static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm1_gpio16_chip *cpm1_gc =
+               container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
        struct cpm_ioport16 __iomem *iop = mm_gc->regs;
 
        cpm1_gc->cpdata = in_be16(&iop->dat);
@@ -649,7 +650,8 @@ struct cpm1_gpio32_chip {
 
 static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm1_gpio32_chip *cpm1_gc =
+               container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
        struct cpm_ioport32b __iomem *iop = mm_gc->regs;
 
        cpm1_gc->cpdata = in_be32(&iop->dat);
index 911456d..947f420 100644 (file)
@@ -94,7 +94,8 @@ struct cpm2_gpio32_chip {
 
 static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm2_gpio32_chip *cpm2_gc =
+               container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
        struct cpm2_ioports __iomem *iop = mm_gc->regs;
 
        cpm2_gc->cpdata = in_be32(&iop->dat);
index 68e7c0d..3cc7cac 100644 (file)
@@ -23,7 +23,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
index 0031eda..385e7aa 100644 (file)
@@ -1,6 +1,7 @@
 config PPC_XICS
        def_bool n
        select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
 
 config PPC_ICP_NATIVE
        def_bool n
index 27c936c..1c6bf4b 100644 (file)
@@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = {
        .irq_mask = ics_opal_mask_irq,
        .irq_unmask = ics_opal_unmask_irq,
        .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_opal_set_affinity
+       .irq_set_affinity = ics_opal_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
 };
 
 static int ics_opal_map(struct ics *ics, unsigned int virq);
index 3854dd4..78ee5c7 100644 (file)
@@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = {
        .irq_mask = ics_rtas_mask_irq,
        .irq_unmask = ics_rtas_unmask_irq,
        .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_rtas_set_affinity
+       .irq_set_affinity = ics_rtas_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
 };
 
 static int ics_rtas_map(struct ics *ics, unsigned int virq)
index a795a5f..9d530f4 100644 (file)
@@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 
        pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
 
-       /* They aren't all level sensitive but we just don't really know */
-       irq_set_status_flags(virq, IRQ_LEVEL);
+       /*
+        * Mark interrupts as edge sensitive by default so that resend
+        * actually works. The device-tree parsing will turn the LSIs
+        * back to level.
+        */
+       irq_clear_status_flags(virq, IRQ_LEVEL);
 
        /* Don't call into ICS for IPIs */
        if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
                           irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
-       /* Current xics implementation translates everything
-        * to level. It is not technically right for MSIs but this
-        * is irrelevant at this point. We might get smarter in the future
-        */
        *out_hwirq = intspec[0];
-       *out_flags = IRQ_TYPE_LEVEL_LOW;
 
+       /*
+        * If intsize is at least 2, we look for the type in the second cell,
+        * we assume the LSB indicates a level interrupt.
+        */
+       if (intsize > 1) {
+               if (intspec[1] & 1)
+                       *out_flags = IRQ_TYPE_LEVEL_LOW;
+               else
+                       *out_flags = IRQ_TYPE_EDGE_RISING;
+       } else
+               *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+       return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+       /*
+        * We only support these. This has really no effect other than setting
+        * the corresponding descriptor bits mind you but those will in turn
+        * affect the resend function when re-enabling an edge interrupt.
+        *
+        * Set set the default to edge as explained in map().
+        */
+       if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+               flow_type = IRQ_TYPE_EDGE_RISING;
+
+       if (flow_type != IRQ_TYPE_EDGE_RISING &&
+           flow_type != IRQ_TYPE_LEVEL_LOW)
+               return -EINVAL;
+
+       irqd_set_trigger_type(d, flow_type);
+
+       return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+       /*
+        * We need to push a dummy CPPR when retriggering, since the subsequent
+        * EOI will try to pop it. Passing 0 works, as the function hard codes
+        * the priority value anyway.
+        */
+       xics_push_cppr(0);
+
+       /* Tell the core to do a soft retrigger */
        return 0;
 }
 
index 9e607bf..e751fe2 100644 (file)
@@ -123,6 +123,7 @@ config S390
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_EARLY_PFN_TO_NID
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_JUMP_LABEL
        select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
        select HAVE_ARCH_SECCOMP_FILTER
@@ -871,4 +872,17 @@ config S390_GUEST
          Select this option if you want to run the kernel as a guest under
          the KVM hypervisor.
 
+config S390_GUEST_OLD_TRANSPORT
+       def_bool y
+       prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+       depends on S390_GUEST
+       help
+         Enable this option to add support for the old s390-virtio
+         transport (i.e. virtio devices NOT based on virtio-ccw). This
+         type of virtio devices is only available on the experimental
+         kuli userspace or with old (< 2.6) qemu. If you are running
+         with a modern version of qemu (which supports virtio-ccw since
+         1.4 and uses it by default since version 2.4), you probably won't
+         need this.
+
 endmenu
index f86a4ee..28c4f96 100644 (file)
@@ -21,16 +21,21 @@ ENTRY(startup_continue)
        lg      %r15,.Lstack-.LPG1(%r13)
        aghi    %r15,-160
        brasl   %r14,decompress_kernel
-       # setup registers for memory mover & branch to target
+       # Set up registers for memory mover. We move the decompressed image to
+       # 0x11000, starting at offset 0x11000 in the decompressed image so
+       # that code living at 0x11000 in the image will end up at 0x11000 in
+       # memory.
        lgr     %r4,%r2
        lg      %r2,.Loffset-.LPG1(%r13)
        la      %r4,0(%r2,%r4)
        lg      %r3,.Lmvsize-.LPG1(%r13)
        lgr     %r5,%r3
-       # move the memory mover someplace safe
+       # Move the memory mover someplace safe so it doesn't overwrite itself.
        la      %r1,0x200
        mvc     0(mover_end-mover,%r1),mover-.LPG1(%r13)
-       # decompress image is started at 0x11000
+       # When the memory mover is done we pass control to
+       # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in
+       # the decompressed image.
        lgr     %r6,%r2
        br      %r1
 mover:
index 889ea34..26e0c7f 100644 (file)
@@ -678,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
index 1bcfd76..24879da 100644 (file)
@@ -616,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
index 13ff090..a5c1e5f 100644 (file)
@@ -615,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
index 577ae1d..2bad9d8 100644 (file)
@@ -51,6 +51,9 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
                struct kernel_fpu vxstate;                                  \
                unsigned long prealign, aligned, remaining;                 \
                                                                            \
+               if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)                   \
+                       return ___crc32_sw(crc, data, datalen);             \
+                                                                           \
                if ((unsigned long)data & VX_ALIGN_MASK) {                  \
                        prealign = VX_ALIGNMENT -                           \
                                  ((unsigned long)data & VX_ALIGN_MASK);    \
@@ -59,9 +62,6 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
                        data = (void *)((unsigned long)data + prealign);    \
                }                                                           \
                                                                            \
-               if (datalen < VX_MIN_LEN)                                   \
-                       return ___crc32_sw(crc, data, datalen);             \
-                                                                           \
                aligned = datalen & ~VX_ALIGN_MASK;                         \
                remaining = datalen & VX_ALIGN_MASK;                        \
                                                                            \
index ccccebe..73610f2 100644 (file)
@@ -234,7 +234,7 @@ CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
index 56e4d82..4431905 100644 (file)
@@ -309,7 +309,9 @@ ENTRY(startup_kdump)
        l       %r15,.Lstack-.LPG0(%r13)
        ahi     %r15,-STACK_FRAME_OVERHEAD
        brasl   %r14,verify_facilities
-       /* Continue with startup code in head64.S */
+# For uncompressed images, continue in
+# arch/s390/kernel/head64.S. For compressed images, continue in
+# arch/s390/boot/compressed/head.S.
        jg      startup_continue
 
 .Lstack:
index ba5f456..7f7ba5f 100644 (file)
@@ -204,11 +204,9 @@ static void __init conmode_default(void)
 #endif
                }
        } else if (MACHINE_IS_KVM) {
-               if (sclp.has_vt220 &&
-                   config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+               if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
                        SET_CONSOLE_VT220;
-               else if (sclp.has_linemode &&
-                        config_enabled(CONFIG_SCLP_CONSOLE))
+               else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
                        SET_CONSOLE_SCLP;
                else
                        SET_CONSOLE_HVC;
index 3f3ae48..f142215 100644 (file)
@@ -1672,6 +1672,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                    KVM_SYNC_CRS |
                                    KVM_SYNC_ARCH0 |
                                    KVM_SYNC_PFAULT;
+       kvm_s390_set_prefix(vcpu, 0);
        if (test_kvm_facility(vcpu->kvm, 64))
                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
        /* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2361,8 +2362,10 @@ retry:
                rc = gmap_mprotect_notify(vcpu->arch.gmap,
                                          kvm_s390_get_prefix(vcpu),
                                          PAGE_SIZE * 2, PROT_WRITE);
-               if (rc)
+               if (rc) {
+                       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
                        return rc;
+               }
                goto retry;
        }
 
index e390bbb..48352bf 100644 (file)
@@ -237,11 +237,10 @@ char * strrchr(const char * s, int c)
 EXPORT_SYMBOL(strrchr);
 
 static inline int clcle(const char *s1, unsigned long l1,
-                       const char *s2, unsigned long l2,
-                       int *diff)
+                       const char *s2, unsigned long l2)
 {
        register unsigned long r2 asm("2") = (unsigned long) s1;
-       register unsigned long r3 asm("3") = (unsigned long) l2;
+       register unsigned long r3 asm("3") = (unsigned long) l1;
        register unsigned long r4 asm("4") = (unsigned long) s2;
        register unsigned long r5 asm("5") = (unsigned long) l2;
        int cc;
@@ -252,7 +251,6 @@ static inline int clcle(const char *s1, unsigned long l1,
                      "   srl   %0,28"
                      : "=&d" (cc), "+a" (r2), "+a" (r3),
                        "+a" (r4), "+a" (r5) : : "cc");
-       *diff = *(char *)r2 - *(char *)r4;
        return cc;
 }
 
@@ -270,9 +268,9 @@ char * strstr(const char * s1,const char * s2)
                return (char *) s1;
        l1 = __strend(s1) - s1;
        while (l1-- >= l2) {
-               int cc, dummy;
+               int cc;
 
-               cc = clcle(s1, l1, s2, l2, &dummy);
+               cc = clcle(s1, l2, s2, l2);
                if (!cc)
                        return (char *) s1;
                s1++;
@@ -313,11 +311,11 @@ EXPORT_SYMBOL(memchr);
  */
 int memcmp(const void *cs, const void *ct, size_t n)
 {
-       int ret, diff;
+       int ret;
 
-       ret = clcle(cs, n, ct, n, &diff);
+       ret = clcle(cs, n, ct, n);
        if (ret)
-               ret = diff;
+               ret = ret == 1 ? -1 : 1;
        return ret;
 }
 EXPORT_SYMBOL(memcmp);
index d965961..f481fcd 100644 (file)
@@ -104,6 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 
 unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+       check_object_size(to, n, false);
        if (static_branch_likely(&have_mvcos))
                return copy_from_user_mvcos(to, from, n);
        return copy_from_user_mvcp(to, from, n);
@@ -177,6 +178,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 
 unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+       check_object_size(from, n, true);
        if (static_branch_likely(&have_mvcos))
                return copy_to_user_mvcos(to, from, n);
        return copy_to_user_mvcs(to, from, n);
index 7104ffb..af7cf28 100644 (file)
@@ -252,6 +252,8 @@ static int change_page_attr(unsigned long addr, unsigned long end,
        int rc = -EINVAL;
        pgd_t *pgdp;
 
+       if (addr == end)
+               return 0;
        if (end >= MODULES_END)
                return -EINVAL;
        mutex_lock(&cpa_mutex);
index 546293d..59b0960 100644 (file)
@@ -43,6 +43,7 @@ config SPARC
        select OLD_SIGSUSPEND
        select ARCH_HAS_SG_CHAIN
        select CPU_NO_EFFICIENT_FFS
+       select HAVE_ARCH_HARDENED_USERCOPY
 
 config SPARC32
        def_bool !64BIT
index 57aca27..341a5a1 100644 (file)
@@ -248,22 +248,28 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon
 
 static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       if (n && __access_ok((unsigned long) to, n))
+       if (n && __access_ok((unsigned long) to, n)) {
+               if (!__builtin_constant_p(n))
+                       check_object_size(from, n, true);
                return __copy_user(to, (__force void __user *) from, n);
-       else
+       else
                return n;
 }
 
 static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+       if (!__builtin_constant_p(n))
+               check_object_size(from, n, true);
        return __copy_user(to, (__force void __user *) from, n);
 }
 
 static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (n && __access_ok((unsigned long) from, n))
+       if (n && __access_ok((unsigned long) from, n)) {
+               if (!__builtin_constant_p(n))
+                       check_object_size(to, n, false);
                return __copy_user((__force void __user *) to, from, n);
-       else
+       else
                return n;
 }
 
index e9a51d6..8bda94f 100644 (file)
@@ -210,8 +210,12 @@ unsigned long copy_from_user_fixup(void *to, const void __user *from,
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
-       unsigned long ret = ___copy_from_user(to, from, size);
+       unsigned long ret;
 
+       if (!__builtin_constant_p(size))
+               check_object_size(to, size, false);
+
+       ret = ___copy_from_user(to, from, size);
        if (unlikely(ret))
                ret = copy_from_user_fixup(to, from, size);
 
@@ -227,8 +231,11 @@ unsigned long copy_to_user_fixup(void __user *to, const void *from,
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
-       unsigned long ret = ___copy_to_user(to, from, size);
+       unsigned long ret;
 
+       if (!__builtin_constant_p(size))
+               check_object_size(from, size, true);
+       ret = ___copy_to_user(to, from, size);
        if (unlikely(ret))
                ret = copy_to_user_fixup(to, from, size);
        return ret;
index 1dd5bd8..1330553 100644 (file)
@@ -81,7 +81,7 @@
   .altinstr_replacement : { *(.altinstr_replacement) }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
+  .exit.text : { EXIT_TEXT }
   .exit.data : { *(.exit.data) }
 
   .preinit_array : {
index e35632e..62dfc64 100644 (file)
@@ -98,7 +98,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-               bool write, bool foreign)
+               bool write, bool execute, bool foreign)
 {
        /* by default, allow everything */
        return true;
index 5c6e747..c580d8c 100644 (file)
@@ -80,6 +80,7 @@ config X86
        select HAVE_ALIGNED_STRUCT_PAGE         if SLUB
        select HAVE_AOUT                        if X86_32
        select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
@@ -91,6 +92,7 @@ config X86
        select HAVE_ARCH_SOFT_DIRTY             if X86_64
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_ARCH_WITHIN_STACK_FRAMES
        select HAVE_EBPF_JIT                    if X86_64
        select HAVE_CC_STACKPROTECTOR
        select HAVE_CMPXCHG_DOUBLE
index 89fa85e..6f97fb3 100644 (file)
@@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
                        req = cast_mcryptd_ctx_to_req(req_ctx);
                        if (irqs_disabled())
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                        else {
                                local_bh_disable();
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                                local_bh_enable();
                        }
                }
index b691da9..a78a069 100644 (file)
@@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
        vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-       movl    _args_digest+4*32(state, idx, 4), tmp2_w
+       vmovd   _args_digest(state , idx, 4) , %xmm0
        vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
 
-       vmovdqu %xmm0, _result_digest(job_rax)
-       movl    tmp2_w, _result_digest+1*16(job_rax)
+        vmovdqu %xmm0, _result_digest(job_rax)
+        offset =  (_result_digest + 1*16)
+        vmovdqu %xmm1, offset(job_rax)
 
        pop     %rbx
 
index f4cf5b7..d210174 100644 (file)
@@ -497,10 +497,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
                        req = cast_mcryptd_ctx_to_req(req_ctx);
                        if (irqs_disabled())
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                        else {
                                local_bh_disable();
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                                local_bh_enable();
                        }
                }
index fe91c25..77f28ce 100644 (file)
@@ -5,6 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
+CFLAGS_syscall_64.o            += -Wno-override-init
+CFLAGS_syscall_32.o            += -Wno-override-init
 obj-y                          := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y                          += common.o
 
index b846875..d172c61 100644 (file)
@@ -288,11 +288,15 @@ return_from_SYSCALL_64:
        jne     opportunistic_sysret_failed
 
        /*
-        * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-        * restoring TF results in a trap from userspace immediately after
-        * SYSRET.  This would cause an infinite loop whenever #DB happens
-        * with register state that satisfies the opportunistic SYSRET
-        * conditions.  For example, single-stepping this user code:
+        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+        * restore RF properly. If the slowpath sets it for whatever reason, we
+        * need to restore it correctly.
+        *
+        * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+        * trap from userspace immediately after SYSRET.  This would cause an
+        * infinite loop whenever #DB happens with register state that satisfies
+        * the opportunistic SYSRET conditions.  For example, single-stepping
+        * this user code:
         *
         *           movq       $stuck_here, %rcx
         *           pushfq
@@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
 .endm
 #endif
 
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+# define POP_SECTION_IRQENTRY  .popsection
+#else
+# define PUSH_SECTION_IRQENTRY
+# define POP_SECTION_IRQENTRY
+#endif
+
 .macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
 apicinterrupt3 \num \sym \do_sym
 trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
 .endm
 
 #ifdef CONFIG_SMP
index 97a69db..9d35ec0 100644 (file)
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
        }
 }
 
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
        if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
        .init_box       = snb_uncore_msr_init_box,
+       .enable_box     = snb_uncore_msr_enable_box,
        .exit_box       = snb_uncore_msr_exit_box,
        .disable_event  = snb_uncore_msr_disable_event,
        .enable_event   = snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
        }
 }
 
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
        if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 
 static struct intel_uncore_ops skl_uncore_msr_ops = {
        .init_box       = skl_uncore_msr_init_box,
+       .enable_box     = skl_uncore_msr_enable_box,
        .exit_box       = skl_uncore_msr_exit_box,
        .disable_event  = snb_uncore_msr_disable_event,
        .enable_event   = snb_uncore_msr_enable_event,
index 824e540..8aee83b 100644 (file)
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
 
 static struct intel_uncore_type hswep_uncore_ha = {
        .name           = "ha",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 2,
        .perf_ctr_bits  = 48,
        SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
 
 static struct intel_uncore_type hswep_uncore_imc = {
        .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 8,
        .perf_ctr_bits  = 48,
        .fixed_ctr_bits = 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
 
 static struct intel_uncore_type hswep_uncore_qpi = {
        .name                   = "qpi",
-       .num_counters           = 5,
+       .num_counters           = 4,
        .num_boxes              = 3,
        .perf_ctr_bits          = 48,
        .perf_ctr               = SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
 
 static struct intel_uncore_type hswep_uncore_r3qpi = {
        .name           = "r3qpi",
-       .num_counters   = 4,
+       .num_counters   = 3,
        .num_boxes      = 3,
        .perf_ctr_bits  = 44,
        .constraints    = hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
 
 static struct intel_uncore_type bdx_uncore_imc = {
        .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
        .num_boxes      = 8,
        .perf_ctr_bits  = 48,
        .fixed_ctr_bits = 48,
index f5befd4..1243577 100644 (file)
@@ -135,6 +135,7 @@ extern void init_apic_mappings(void);
 void register_lapic_address(unsigned long address);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
+extern void lapic_update_tsc_freq(void);
 extern int APIC_init_uniprocessor(void);
 
 #ifdef CONFIG_X86_64
@@ -170,6 +171,7 @@ static inline void init_apic_mappings(void) { }
 static inline void disable_local_APIC(void) { }
 # define setup_boot_APIC_clock x86_init_noop
 # define setup_secondary_APIC_clock x86_init_noop
+static inline void lapic_update_tsc_freq(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
index 7178043..59405a2 100644 (file)
@@ -22,10 +22,6 @@ typedef struct {
 #ifdef CONFIG_SMP
        unsigned int irq_resched_count;
        unsigned int irq_call_count;
-       /*
-        * irq_tlb_count is double-counted in irq_call_count, so it must be
-        * subtracted from irq_call_count when displaying irq_call_count
-        */
        unsigned int irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
index 2230420..737da62 100644 (file)
@@ -5,10 +5,10 @@ struct x86_mapping_info {
        void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
        void *context;                   /* context for alloc_pgt_page */
        unsigned long pmd_flag;          /* page flag for PMD entry */
-       bool kernel_mapping;             /* kernel mapping or ident mapping */
+       unsigned long offset;            /* ident mapping offset */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-                               unsigned long addr, unsigned long end);
+                               unsigned long pstart, unsigned long pend);
 
 #endif /* _ASM_X86_INIT_H */
index 7e8ec7a..1cc82ec 100644 (file)
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
 /* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
index 9c6b890..b2988c0 100644 (file)
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
 extern unsigned char secondary_startup_64[];
 #endif
 
+static inline size_t real_mode_size_needed(void)
+{
+       if (real_mode_header)
+               return 0;       /* already allocated. */
+
+       return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
-void setup_real_mode(void);
 
 #endif /* _ARCH_X86_REALMODE_H */
index 84b5984..8b7c8d8 100644 (file)
@@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void)
        return sp;
 }
 
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ *              1 if within a frame
+ *             -1 if placed across a frame boundary (or outside stack)
+ *              0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+                                          const void * const stackend,
+                                          const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+       const void *frame = NULL;
+       const void *oldframe;
+
+       oldframe = __builtin_frame_address(1);
+       if (oldframe)
+               frame = __builtin_frame_address(2);
+       /*
+        * low ----------------------------------------------> high
+        * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+        *                     ^----------------^
+        *               allow copies only within here
+        */
+       while (stack <= frame && frame < stackend) {
+               /*
+                * If obj + len extends past the last frame, this
+                * check won't pass and the next frame will be 0,
+                * causing us to bail out and correctly report
+                * the copy as invalid.
+                */
+               if (obj + len <= frame)
+                       return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+               oldframe = frame;
+               frame = *(const void * const *)frame;
+       }
+       return -1;
+#else
+       return 0;
+#endif
+}
+
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
index 4e5be94..6fa8594 100644 (file)
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 static inline void __native_flush_tlb(void)
 {
+       /*
+        * If current->mm == NULL then we borrow a mm which may change during a
+        * task switch and therefore we must not be preempted while we write CR3
+        * back:
+        */
+       preempt_disable();
        native_write_cr3(native_read_cr3());
+       preempt_enable();
 }
 
 static inline void __native_flush_tlb_global_irq_disabled(void)
index c03bfb6..a0ae610 100644 (file)
@@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
         * case, and do only runtime checking for non-constant sizes.
         */
 
-       if (likely(sz < 0 || sz >= n))
+       if (likely(sz < 0 || sz >= n)) {
+               check_object_size(to, n, false);
                n = _copy_from_user(to, from, n);
-       else if(__builtin_constant_p(n))
+       } else if (__builtin_constant_p(n))
                copy_from_user_overflow();
        else
                __copy_from_user_overflow(sz, n);
@@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
        might_fault();
 
        /* See the comment in copy_from_user() above. */
-       if (likely(sz < 0 || sz >= n))
+       if (likely(sz < 0 || sz >= n)) {
+               check_object_size(from, n, true);
                n = _copy_to_user(to, from, n);
-       else if(__builtin_constant_p(n))
+       } else if (__builtin_constant_p(n))
                copy_to_user_overflow();
        else
                __copy_to_user_overflow(sz, n);
@@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 #define user_access_begin()    __uaccess_begin()
 #define user_access_end()      __uaccess_end()
 
-#define unsafe_put_user(x, ptr)                                                \
-({                                                                             \
+#define unsafe_put_user(x, ptr, err_label)                                     \
+do {                                                                           \
        int __pu_err;                                                           \
        __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);         \
-       __builtin_expect(__pu_err, 0);                                          \
-})
+       if (unlikely(__pu_err)) goto err_label;                                 \
+} while (0)
 
-#define unsafe_get_user(x, ptr)                                                \
-({                                                                             \
+#define unsafe_get_user(x, ptr, err_label)                                     \
+do {                                                                           \
        int __gu_err;                                                           \
        unsigned long __gu_val;                                                 \
        __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);    \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
-       __builtin_expect(__gu_err, 0);                                          \
-})
+       if (unlikely(__gu_err)) goto err_label;                                 \
+} while (0)
 
 #endif /* _ASM_X86_UACCESS_H */
 
index 4b32da2..7d3bdd1 100644 (file)
@@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
 static __always_inline unsigned long __must_check
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
+       check_object_size(from, n, true);
        return __copy_to_user_ll(to, from, n);
 }
 
@@ -95,6 +96,7 @@ static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_fault();
+       check_object_size(to, n, false);
        if (__builtin_constant_p(n)) {
                unsigned long ret;
 
index 2eac2aa..673059a 100644 (file)
@@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
 {
        int ret = 0;
 
+       check_object_size(dst, size, false);
        if (!__builtin_constant_p(size))
                return copy_user_generic(dst, (__force void *)src, size);
        switch (size) {
@@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
 {
        int ret = 0;
 
+       check_object_size(src, size, true);
        if (!__builtin_constant_p(size))
                return copy_user_generic((__force void *)dst, src, size);
        switch (size) {
index c852590..e652a7c 100644 (file)
@@ -79,7 +79,7 @@ struct uv_gam_range_entry {
        u16     nasid;          /* HNasid */
        u16     sockid;         /* Socket ID, high bits of APIC ID */
        u16     pnode;          /* Index to MMR and GRU spaces */
-       u32     pxm;            /* ACPI proximity domain number */
+       u32     unused2;
        u32     limit;          /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
@@ -88,7 +88,8 @@ struct uv_gam_range_entry {
 #define        UV_SYSTAB_VERSION_UV4           0x400   /* UV4 BIOS base version */
 #define        UV_SYSTAB_VERSION_UV4_1         0x401   /* + gpa_shift */
 #define        UV_SYSTAB_VERSION_UV4_2         0x402   /* + TYPE_NVRAM/WINDOW/MBOX */
-#define        UV_SYSTAB_VERSION_UV4_LATEST    UV_SYSTAB_VERSION_UV4_2
+#define        UV_SYSTAB_VERSION_UV4_3         0x403   /* - GAM Range PXM Value */
+#define        UV_SYSTAB_VERSION_UV4_LATEST    UV_SYSTAB_VERSION_UV4_3
 
 #define        UV_SYSTAB_TYPE_UNUSED           0       /* End of table (offset == 0) */
 #define        UV_SYSTAB_TYPE_GAM_PARAMS       1       /* GAM PARAM conversions */
index 20abd91..50c95af 100644 (file)
@@ -313,7 +313,7 @@ int lapic_get_maxlvt(void)
 
 /* Clock divisor */
 #define APIC_DIVISOR 16
-#define TSC_DIVISOR  32
+#define TSC_DIVISOR  8
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -565,12 +565,36 @@ static void setup_APIC_timer(void)
                                    CLOCK_EVT_FEAT_DUMMY);
                levt->set_next_event = lapic_next_deadline;
                clockevents_config_and_register(levt,
-                                               (tsc_khz / TSC_DIVISOR) * 1000,
+                                               tsc_khz * (1000 / TSC_DIVISOR),
                                                0xF, ~0UL);
        } else
                clockevents_register_device(levt);
 }
 
+/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+       struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+       if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+               return;
+
+       clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+       /*
+        * The clockevent device's ->mult and ->shift can both be
+        * changed. In order to avoid races, schedule the frequency
+        * update code on each CPU.
+        */
+       on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
 /*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
@@ -1599,6 +1623,9 @@ void __init enable_IR_x2apic(void)
        unsigned long flags;
        int ret, ir_stat;
 
+       if (skip_ioapic_setup)
+               return;
+
        ir_stat = irq_remapping_prepare();
        if (ir_stat < 0 && !x2apic_supported())
                return;
index 6368fa6..54f35d9 100644 (file)
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
 /*
  * At CPU state changes, update the x2apic cluster sibling info.
  */
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
 {
        if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
                return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
        return 0;
 }
 
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
 {
        int cpu;
 
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
 static int x2apic_cluster_probe(void)
 {
        int cpu = smp_processor_id();
+       int ret;
 
        if (!x2apic_mode)
                return 0;
 
+       ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+                               x2apic_prepare_cpu, x2apic_dead_cpu);
+       if (ret < 0) {
+               pr_err("Failed to register X2APIC_PREPARE\n");
+               return 0;
+       }
        cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-       cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
-                         x2apic_prepare_cpu, x2apic_dead_cpu);
        return 1;
 }
 
index 09b59ad..cb0673c 100644 (file)
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
        if (strncmp(oem_id, "SGI", 3) != 0)
                return 0;
 
+       if (numa_off) {
+               pr_err("UV: NUMA is off, disabling UV support\n");
+               return 0;
+       }
+
        /* Setup early hub type field in uv_hub_info for Node 0 */
        uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
 
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
        struct uv_gam_range_entry *gre = uv_gre_table;
        struct uv_gam_range_s *grt;
        unsigned long last_limit = 0, ram_limit = 0;
-       int bytes, i, sid, lsid = -1;
+       int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
 
        if (!gre)
                return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
                }
                sid = gre->sockid - _min_socket;
                if (lsid < sid) {               /* new range */
-                       grt = &_gr_table[sid];
-                       grt->base = lsid;
+                       grt = &_gr_table[indx];
+                       grt->base = lindx;
                        grt->nasid = gre->nasid;
                        grt->limit = last_limit = gre->limit;
                        lsid = sid;
+                       lindx = indx++;
                        continue;
                }
                if (lsid == sid && !ram_limit) {        /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
                }
                if (!ram_limit) {               /* non-contiguous ram range */
                        grt++;
-                       grt->base = sid - 1;
+                       grt->base = lindx;
                        grt->nasid = gre->nasid;
                        grt->limit = last_limit = gre->limit;
                        continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
        for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
                if (!index) {
                        pr_info("UV: GAM Range Table...\n");
-                       pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+                       pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n",
                                "Range", "", "Size", "Type", "NASID",
-                               "SID", "PN", "PXM");
+                               "SID", "PN");
                }
                pr_info(
-               "UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x %3d\n",
+               "UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
                        index++,
                        (unsigned long)lgre << UV_GAM_RANGE_SHFT,
                        (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
                        ((unsigned long)(gre->limit - lgre)) >>
                                (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
-                       gre->type, gre->nasid, gre->sockid,
-                       gre->pnode, gre->pxm);
+                       gre->type, gre->nasid, gre->sockid, gre->pnode);
 
                lgre = gre->limit;
                if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
                _pnode_to_socket[i] = SOCK_EMPTY;
 
        /* fill in pnode/node/addr conversion list values */
-       pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+       pr_info("UV: GAM Building socket/pnode conversion tables\n");
        for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
                if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
                        continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
                if (_socket_to_pnode[i] != SOCK_EMPTY)
                        continue;       /* duplicate */
                _socket_to_pnode[i] = gre->pnode;
-               _socket_to_node[i] = gre->pxm;
 
                i = gre->pnode - minpnode;
                _pnode_to_socket[i] = gre->sockid;
 
                pr_info(
-               "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+               "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
                        gre->sockid, gre->type, gre->nasid,
                        _socket_to_pnode[gre->sockid - minsock],
-                       _socket_to_node[gre->sockid - minsock],
                        _pnode_to_socket[gre->pnode - minpnode]);
        }
 
-       /* check socket -> node values */
+       /* Set socket -> node values */
        lnid = -1;
        for_each_present_cpu(cpu) {
                int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
                lnid = nid;
                apicid = per_cpu(x86_cpu_to_apicid, cpu);
                sockid = apicid >> uv_cpuid.socketid_shift;
-               i = sockid - minsock;
-
-               if (nid != _socket_to_node[i]) {
-                       pr_warn(
-                       "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
-                               i, sockid, gre->type, _socket_to_node[i], nid);
-                       _socket_to_node[i] = nid;
-               }
+               _socket_to_node[sockid - minsock] = nid;
+               pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+                       sockid, apicid, nid);
        }
 
        /* Setup physical blade to pnode translation from GAM Range Table */
index 27a0228..b816971 100644 (file)
@@ -355,6 +355,7 @@ void load_ucode_amd_ap(void)
        unsigned int cpu = smp_processor_id();
        struct equiv_cpu_entry *eq;
        struct microcode_amd *mc;
+       u8 *cont = container;
        u32 rev, eax;
        u16 eq_id;
 
@@ -371,8 +372,11 @@ void load_ucode_amd_ap(void)
        if (check_current_patch_level(&rev, false))
                return;
 
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
        eax = cpuid_eax(0x00000001);
-       eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+       eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
 
        eq_id = find_equiv_id(eq, eax);
        if (!eq_id)
@@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void)
        else
                container = cont_va;
 
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
        eax   = cpuid_eax(0x00000001);
        eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
index 680049a..01567aa 100644 (file)
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
        return get_xsave_addr(&fpu->state.xsave, xsave_state);
 }
 
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state".  This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
-               int xstate_feature_mask)
-{
-       xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- *     @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- *     XFEATURE_MASK_SSE, etc...)
- *     @xsave_state_ptr: a pointer to a copy of the state that you would
- *     like written in to the current task's FPU xsave state.  This pointer
- *     must not be located in the current tasks's xsave area.
- * Output:
- *     address of the state in the xsave area or NULL if the state
- *     is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
-               void *xstate_feature_src, size_t len)
-{
-       struct xregs_state *xsave = &current->thread.fpu.state.xsave;
-       struct fpu *fpu = &current->thread.fpu;
-       void *dst;
-
-       if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
-               WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
-               return;
-       }
-
-       /*
-        * Tell the FPU code that we need the FPU state to be in
-        * 'fpu' (not in the registers), and that we need it to
-        * be stable while we write to it.
-        */
-       fpu__current_fpstate_write_begin();
-
-       /*
-        * This method *WILL* *NOT* work for compact-format
-        * buffers.  If the 'xstate_feature_mask' is unset in
-        * xcomp_bv then we may need to move other feature state
-        * "up" in the buffer.
-        */
-       if (xsave->header.xcomp_bv & xstate_feature_mask) {
-               WARN_ON_ONCE(1);
-               goto out;
-       }
-
-       /* find the location in the xsave buffer of the desired state */
-       dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
-       /*
-        * Make sure that the pointer being passed in did not
-        * come from the xsave buffer itself.
-        */
-       WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
-       /* put the caller-provided data in the location */
-       memcpy(dst, xstate_feature_src, len);
-
-       /*
-        * Mark the xfeature so that the CPU knows there is state
-        * in the buffer now.
-        */
-       fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
-       /*
-        * We are done writing to the 'fpu'.  Reenable preeption
-        * and (possibly) move the fpstate back in to the fpregs.
-        */
-       fpu__current_fpstate_write_end();
-}
-
 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
 
 /*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory.  We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
  */
 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
                unsigned long init_val)
 {
-       struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
-       struct pkru_state *old_pkru_state;
-       struct pkru_state new_pkru_state;
+       u32 old_pkru;
        int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
        u32 new_pkru_bits = 0;
 
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
         */
        if (!boot_cpu_has(X86_FEATURE_OSPKE))
                return -EINVAL;
+       /*
+        * For most XSAVE components, this would be an arduous task:
+        * brining fpstate up to date with fpregs, updating fpstate,
+        * then re-populating fpregs.  But, for components that are
+        * never lazily managed, we can just access the fpregs
+        * directly.  PKRU is never managed lazily, so we can just
+        * manipulate it directly.  Make sure it stays that way.
+        */
+       WARN_ON_ONCE(!use_eager_fpu());
 
        /* Set the bits we need in PKRU:  */
        if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
        /* Shift the bits in to the correct place in PKRU for pkey: */
        new_pkru_bits <<= pkey_shift;
 
-       /* Locate old copy of the state in the xsave buffer: */
-       old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
-
-       /*
-        * When state is not in the buffer, it is in the init
-        * state, set it manually.  Otherwise, copy out the old
-        * state.
-        */
-       if (!old_pkru_state)
-               new_pkru_state.pkru = 0;
-       else
-               new_pkru_state.pkru = old_pkru_state->pkru;
-
-       /* Mask off any old bits in place: */
-       new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
-       /* Set the newly-requested bits: */
-       new_pkru_state.pkru |= new_pkru_bits;
-
-       /*
-        * We could theoretically live without zeroing pkru.pad.
-        * The current XSAVE feature state definition says that
-        * only bytes 0->3 are used.  But we do not want to
-        * chance leaking kernel stack out to userspace in case a
-        * memcpy() of the whole xsave buffer was done.
-        *
-        * They're in the same cacheline anyway.
-        */
-       new_pkru_state.pad = 0;
+       /* Get old PKRU and mask off any old bits in place: */
+       old_pkru = read_pkru();
+       old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
 
-       fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+       /* Write old part along with new part: */
+       write_pkru(old_pkru | new_pkru_bits);
 
        return 0;
 }
index 2dda0bc..f16c55b 100644 (file)
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
        /* Initialize 32bit specific setup functions */
        x86_init.resources.reserve_resources = i386_reserve_resources;
        x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
-       reserve_bios_regions();
 }
 
 asmlinkage __visible void __init i386_start_kernel(void)
index 99d48e7..54a2372 100644 (file)
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
                copy_bootdata(__va(real_mode_data));
 
        x86_early_init_platform_quirks();
-       reserve_bios_regions();
 
        switch (boot_params.hdr.hardware_subarch) {
        case X86_SUBARCH_INTEL_MID:
index ed16e58..c6dfd80 100644 (file)
@@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
        memset(&curr_time, 0, sizeof(struct rtc_time));
 
        if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-               mc146818_set_time(&curr_time);
+               mc146818_get_time(&curr_time);
 
        if (hpet_rtc_flags & RTC_UIE &&
            curr_time.tm_sec != hpet_prev_update_sec) {
index 61521dc..9f669fd 100644 (file)
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
        seq_puts(p, "  Rescheduling interrupts\n");
        seq_printf(p, "%*s: ", prec, "CAL");
        for_each_online_cpu(j)
-               seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
-                                       irq_stats(j)->irq_tlb_count);
+               seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
        seq_puts(p, "  Function call interrupts\n");
        seq_printf(p, "%*s: ", prec, "TLB");
        for_each_online_cpu(j)
index 991b779..0fa60f5 100644 (file)
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
 
        x86_init.oem.arch_setup();
 
-       kernel_randomize_memory();
-
        iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
        setup_memory_map();
        parse_setup_data();
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
 
        max_possible_pfn = max_pfn;
 
+       /*
+        * Define random base addresses for memory sections after max_pfn is
+        * defined and before each memory section base is used.
+        */
+       kernel_randomize_memory();
+
 #ifdef CONFIG_X86_32
        /* max_low_pfn get updated here */
        find_low_pfn_range();
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
                efi_find_mirror();
        }
 
+       reserve_bios_regions();
+
        /*
         * The EFI specification says that boot service code won't be called
         * after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
 
        early_trap_pf_init();
 
-       setup_real_mode();
+       /*
+        * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+        * with the current CR4 value.  This may not be necessary, but
+        * auditing all the early-boot CR4 manipulation would be needed to
+        * rule it out.
+        */
+       if (boot_cpu_data.cpuid_level >= 0)
+               /* A CPU has %cr4 if and only if it has CPUID. */
+               mmu_cr4_features = __read_cr4();
 
        memblock_set_current_limit(get_max_mapped());
 
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
 
        kasan_init();
 
-       if (boot_cpu_data.cpuid_level >= 0) {
-               /* A CPU has %cr4 if and only if it has CPUID */
-               mmu_cr4_features = __read_cr4();
-               if (trampoline_cr4_features)
-                       *trampoline_cr4_features = mmu_cr4_features;
-       }
-
 #ifdef CONFIG_X86_32
        /* sync back kernel address range */
        clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
index 2a6e84a..4296beb 100644 (file)
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 /* Logical package management. We might want to allocate that dynamically */
 static int *physical_to_logical_pkg __read_mostly;
 static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
 static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
        if (test_and_set_bit(pkg, physical_package_map))
                goto found;
 
-       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-       if (new >= __max_logical_packages) {
+       if (logical_packages_frozen) {
                physical_to_logical_pkg[pkg] = -1;
-               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+               pr_warn("APIC(%x) Package %u exceeds logical package max\n",
                        apicid, pkg);
                return -ENOSPC;
        }
-       set_bit(new, logical_package_map);
+
+       new = logical_packages++;
        pr_info("APIC(%x) Converting physical %u to logical package %u\n",
                apicid, pkg, new);
        physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
        }
 
        __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+       logical_packages = 0;
 
        /*
         * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
        memset(physical_to_logical_pkg, 0xff, size);
        size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
        physical_package_map = kzalloc(size, GFP_KERNEL);
-       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-       logical_package_map = kzalloc(size, GFP_KERNEL);
-
-       pr_info("Max logical packages: %u\n", __max_logical_packages);
 
        for_each_present_cpu(cpu) {
                unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
                set_cpu_possible(cpu, false);
                set_cpu_present(cpu, false);
        }
+
+       if (logical_packages > __max_logical_packages) {
+               pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+                       logical_packages, __max_logical_packages);
+               logical_packages_frozen = true;
+               __max_logical_packages  = logical_packages;
+       }
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
 }
 
 void __init smp_store_boot_cpu_info(void)
index 1ef87e8..78b9cb5 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/geode.h>
+#include <asm/apic.h>
 
 unsigned int __read_mostly cpu_khz;    /* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
                (unsigned long)tsc_khz / 1000,
                (unsigned long)tsc_khz % 1000);
 
+       /* Inform the TSC deadline clockevent devices about the recalibration */
+       lapic_update_tsc_freq();
+
 out:
        if (boot_cpu_has(X86_FEATURE_ART))
                art_related_clocksource = &clocksource_tsc;
index 6c1ff31..495c776 100644 (file)
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
                *cursor &= 0xfe;
        }
        /*
-        * Similar treatment for VEX3 prefix.
-        * TODO: add XOP/EVEX treatment when insn decoder supports them
+        * Similar treatment for VEX3/EVEX prefix.
+        * TODO: add XOP treatment when insn decoder supports them
         */
-       if (insn->vex_prefix.nbytes == 3) {
+       if (insn->vex_prefix.nbytes >= 3) {
                /*
                 * vex2:     c5    rvvvvLpp   (has no b bit)
                 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
                 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-                *   (evex will need setting of both b and x since
-                *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-                * Setting VEX3.b (setting because it has inverted meaning):
+                * Setting VEX3.b (setting because it has inverted meaning).
+                * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+                * is the 4th bit of MODRM.rm, and needs the same treatment.
+                * For VEX3-encoded insns, VEX3.x value has no effect in
+                * non-SIB encoding, the change is superfluous but harmless.
                 */
                cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-               *cursor |= 0x20;
+               *cursor |= 0x60;
        }
 
        /*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 
        reg = MODRM_REG(insn);  /* Fetch modrm.reg */
        reg2 = 0xff;            /* Fetch vex.vvvv */
-       if (insn->vex_prefix.nbytes == 2)
-               reg2 = insn->vex_prefix.bytes[1];
-       else if (insn->vex_prefix.nbytes == 3)
+       if (insn->vex_prefix.nbytes)
                reg2 = insn->vex_prefix.bytes[2];
        /*
-        * TODO: add XOP, EXEV vvvv reading.
+        * TODO: add XOP vvvv reading.
         *
         * vex.vvvv field is in bits 6-3, bits are inverted.
         * But in 32-bit mode, high-order bit may be ignored.
index a45d858..5cede40 100644 (file)
@@ -422,6 +422,7 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -435,6 +436,8 @@ struct nested_vmx {
        bool pi_pending;
        u16 posted_intr_nv;
 
+       unsigned long *msr_bitmap;
+
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
 
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2198,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                        new.control) != old.control);
 }
 
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+       vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+       vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        /* Setup TSC multiplier */
        if (kvm_has_tsc_control &&
-           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
-               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
-               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-       }
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+               decache_tsc_multiplier(vmx);
 
        vmx_vcpu_pi_load(vcpu, cpu);
        vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
        unsigned long *msr_bitmap;
 
        if (is_guest_mode(vcpu))
-               msr_bitmap = vmx_msr_bitmap_nested;
+               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
        else if (cpu_has_secondary_exec_ctrls() &&
                 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@ static __init int hardware_setup(void)
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
 
-       if (nested) {
-               vmx_msr_bitmap_nested =
-                       (unsigned long *)__get_free_page(GFP_KERNEL);
-               if (!vmx_msr_bitmap_nested)
-                       goto out5;
-       }
-
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
                goto out6;
@@ -6392,8 +6391,6 @@ static __init int hardware_setup(void)
 
        memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
        memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-       if (nested)
-               memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
 
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
@@ -6529,9 +6526,6 @@ out8:
 out7:
        free_page((unsigned long)vmx_vmread_bitmap);
 out6:
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
        free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@ static __exit void hardware_unsetup(void)
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
        free_page((unsigned long)vmx_vmread_bitmap);
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
 
        free_kvm_area();
 }
@@ -6995,16 +6987,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
+       if (cpu_has_vmx_msr_bitmap()) {
+               vmx->nested.msr_bitmap =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+               if (!vmx->nested.msr_bitmap)
+                       goto out_msr_bitmap;
+       }
+
        vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
        if (!vmx->nested.cached_vmcs12)
-               return -ENOMEM;
+               goto out_cached_vmcs12;
 
        if (enable_shadow_vmcs) {
                shadow_vmcs = alloc_vmcs();
-               if (!shadow_vmcs) {
-                       kfree(vmx->nested.cached_vmcs12);
-                       return -ENOMEM;
-               }
+               if (!shadow_vmcs)
+                       goto out_shadow_vmcs;
                /* mark vmcs as shadow */
                shadow_vmcs->revision_id |= (1u << 31);
                /* init shadow vmcs */
@@ -7024,6 +7021,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
        return 1;
+
+out_shadow_vmcs:
+       kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+       free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+       return -ENOMEM;
 }
 
 /*
@@ -7098,6 +7104,10 @@ static void free_nested(struct vcpu_vmx *vmx)
        vmx->nested.vmxon = false;
        free_vpid(vmx->nested.vpid02);
        nested_release_vmcs12(vmx);
+       if (vmx->nested.msr_bitmap) {
+               free_page((unsigned long)vmx->nested.msr_bitmap);
+               vmx->nested.msr_bitmap = NULL;
+       }
        if (enable_shadow_vmcs)
                free_vmcs(vmx->nested.current_shadow_vmcs);
        kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 {
        u32 sec_exec_control;
 
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
        /*
         * There is not point to enable virtualize x2apic without enable
         * apicv
@@ -9472,8 +9488,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 {
        int msr;
        struct page *page;
-       unsigned long *msr_bitmap;
+       unsigned long *msr_bitmap_l1;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
 
+       /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;
 
@@ -9482,63 +9500,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                WARN_ON(1);
                return false;
        }
-       msr_bitmap = (unsigned long *)kmap(page);
-       if (!msr_bitmap) {
+       msr_bitmap_l1 = (unsigned long *)kmap(page);
+       if (!msr_bitmap_l1) {
                nested_release_page_clean(page);
                WARN_ON(1);
                return false;
        }
 
+       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
        if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
                if (nested_cpu_has_apic_reg_virt(vmcs12))
                        for (msr = 0x800; msr <= 0x8ff; msr++)
                                nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap,
-                                       vmx_msr_bitmap_nested,
+                                       msr_bitmap_l1, msr_bitmap_l0,
                                        msr, MSR_TYPE_R);
-               /* TPR is allowed */
-               nested_vmx_disable_intercept_for_msr(msr_bitmap,
-                               vmx_msr_bitmap_nested,
+
+               nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_TASKPRI >> 4),
                                MSR_TYPE_R | MSR_TYPE_W);
+
                if (nested_cpu_has_vid(vmcs12)) {
-                       /* EOI and self-IPI are allowed */
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_EOI >> 4),
                                MSR_TYPE_W);
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
                                MSR_TYPE_W);
                }
-       } else {
-               /*
-                * Enable reading intercept of all the x2apic
-                * MSRs. We should not rely on vmcs12 to do any
-                * optimizations here, it may have been modified
-                * by L1.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               msr,
-                               MSR_TYPE_R);
-
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_EOI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-                               MSR_TYPE_W);
        }
        kunmap(page);
        nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        }
 
        if (cpu_has_vmx_msr_bitmap() &&
-           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
-               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
-               /* MSR_BITMAP will be set by following vmx_set_efer. */
-       else
+           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+               /* MSR_BITMAP will be set by following vmx_set_efer. */
+       else
                exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
        /*
@@ -10011,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
        else
                vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
 
        if (enable_vpid) {
                /*
@@ -10767,6 +10761,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        else
                vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                              PIN_BASED_VMX_PREEMPTION_TIMER);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
+
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;
index 02de3d7..8a602a1 100644 (file)
@@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32)
 
 ENTRY(__sw_hweight64)
 #ifdef CONFIG_X86_64
+       pushq   %rdi
        pushq   %rdx
 
        movq    %rdi, %rdx                      # w -> t
@@ -60,6 +61,7 @@ ENTRY(__sw_hweight64)
        shrq    $56, %rax                       # w = w_tmp >> 56
 
        popq    %rdx
+       popq    %rdi
        ret
 #else /* CONFIG_X86_32 */
        /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
index f7dfeda..121f59c 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/cpufeature.h>
 #include <asm/setup.h>
 
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
 #define has_cpuflag(f) boot_cpu_has(f)
 #define get_boot_seed() kaslr_offset()
 #endif
index ec21796..4473cb4 100644 (file)
@@ -3,15 +3,17 @@
  * included by both the compressed kernel and the regular kernel.
  */
 
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
                           unsigned long addr, unsigned long end)
 {
        addr &= PMD_MASK;
        for (; addr < end; addr += PMD_SIZE) {
                pmd_t *pmd = pmd_page + pmd_index(addr);
 
-               if (!pmd_present(*pmd))
-                       set_pmd(pmd, __pmd(addr | pmd_flag));
+               if (pmd_present(*pmd))
+                       continue;
+
+               set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
        }
 }
 
@@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 
                if (pud_present(*pud)) {
                        pmd = pmd_offset(pud, 0);
-                       ident_pmd_init(info->pmd_flag, pmd, addr, next);
+                       ident_pmd_init(info, pmd, addr, next);
                        continue;
                }
                pmd = (pmd_t *)info->alloc_pgt_page(info->context);
                if (!pmd)
                        return -ENOMEM;
-               ident_pmd_init(info->pmd_flag, pmd, addr, next);
+               ident_pmd_init(info, pmd, addr, next);
                set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
        }
 
@@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 }
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-                             unsigned long addr, unsigned long end)
+                             unsigned long pstart, unsigned long pend)
 {
+       unsigned long addr = pstart + info->offset;
+       unsigned long end = pend + info->offset;
        unsigned long next;
        int result;
-       int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
 
        for (; addr < end; addr = next) {
-               pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+               pgd_t *pgd = pgd_page + pgd_index(addr);
                pud_t *pud;
 
                next = (addr & PGDIR_MASK) + PGDIR_SIZE;
index 6209289..d28a2d7 100644 (file)
@@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
        return __va(pfn << PAGE_SHIFT);
 }
 
-/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE      (6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT      6
+#else
+#define INIT_PGD_PAGE_COUNT      12
+#endif
+#define INIT_PGT_BUF_SIZE      (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
 {
index 26dccd6..bda8d5e 100644 (file)
@@ -77,7 +77,7 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region)
  */
 static inline bool kaslr_memory_enabled(void)
 {
-       return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
+       return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
 }
 
 /* Initialize base and padding for each memory region randomized with KASLR */
@@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void)
         * add padding if needed (especially for memory hotplug support).
         */
        BUG_ON(kaslr_regions[0].base != &page_offset_base);
-       memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+       memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
                CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
 
        /* Adapt phyiscal memory region size based on available memory */
index b814ca6..7948be3 100644 (file)
@@ -41,6 +41,7 @@ static DEFINE_RAW_SPINLOCK(list_lock);
  * @node:      list item for parent traversal.
  * @rcu:       RCU callback item for freeing.
  * @irq:       back pointer to parent.
+ * @enabled:   true if driver enabled IRQ
  * @virq:      the virtual IRQ value provided to the requesting driver.
  *
  * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
@@ -50,6 +51,7 @@ struct vmd_irq {
        struct list_head        node;
        struct rcu_head         rcu;
        struct vmd_irq_list     *irq;
+       bool                    enabled;
        unsigned int            virq;
 };
 
@@ -122,7 +124,9 @@ static void vmd_irq_enable(struct irq_data *data)
        unsigned long flags;
 
        raw_spin_lock_irqsave(&list_lock, flags);
+       WARN_ON(vmdirq->enabled);
        list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
+       vmdirq->enabled = true;
        raw_spin_unlock_irqrestore(&list_lock, flags);
 
        data->chip->irq_unmask(data);
@@ -136,8 +140,10 @@ static void vmd_irq_disable(struct irq_data *data)
        data->chip->irq_mask(data);
 
        raw_spin_lock_irqsave(&list_lock, flags);
-       list_del_rcu(&vmdirq->node);
-       INIT_LIST_HEAD_RCU(&vmdirq->node);
+       if (vmdirq->enabled) {
+               list_del_rcu(&vmdirq->node);
+               vmdirq->enabled = false;
+       }
        raw_spin_unlock_irqrestore(&list_lock, flags);
 }
 
index 4480c06..89d1146 100644 (file)
@@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
        for_each_efi_memory_desc(md) {
                unsigned long long start = md->phys_addr;
                unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+               size_t rm_size;
 
                if (md->type != EFI_BOOT_SERVICES_CODE &&
                    md->type != EFI_BOOT_SERVICES_DATA)
@@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
                if (md->attribute & EFI_MEMORY_RUNTIME)
                        continue;
 
+               /*
+                * Nasty quirk: if all sub-1MB memory is used for boot
+                * services, we can get here without having allocated the
+                * real mode trampoline.  It's too late to hand boot services
+                * memory back to the memblock allocator, so instead
+                * try to manually allocate the trampoline if needed.
+                *
+                * I've seen this on a Dell XPS 13 9350 with firmware
+                * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+                * grub2-efi on a hard disk.  (And no, I don't know why
+                * this happened, but Linux should still try to boot rather
+                * panicing early.)
+                */
+               rm_size = real_mode_size_needed();
+               if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+                       set_real_mode_mem(start, rm_size);
+                       start += rm_size;
+                       size -= rm_size;
+               }
+
                free_bootmem_late(start, size);
        }
 
index 66b2166..23f2f3e 100644 (file)
@@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 void uv_bios_init(void)
 {
        uv_systab = NULL;
-       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+           !efi.uv_systab || efi_runtime_disabled()) {
                pr_crit("UV: UVsystab: missing\n");
                return;
        }
@@ -199,12 +200,14 @@ void uv_bios_init(void)
                return;
        }
 
+       /* Starting with UV4 the UV systab size is variable */
        if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+               int size = uv_systab->size;
+
                iounmap(uv_systab);
-               uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+               uv_systab = ioremap(efi.uv_systab, size);
                if (!uv_systab) {
-                       pr_err("UV: UVsystab: ioremap(%d) failed!\n",
-                               uv_systab->size);
+                       pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
                        return;
                }
        }
index f0b5f2d..9634557 100644 (file)
@@ -87,7 +87,7 @@ static int set_up_temporary_mappings(void)
        struct x86_mapping_info info = {
                .alloc_pgt_page = alloc_pgt_page,
                .pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
-               .kernel_mapping = true,
+               .offset         = __PAGE_OFFSET,
        };
        unsigned long mstart, mend;
        pgd_t *pgd;
@@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void)
                        return result;
        }
 
-       temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
+       temp_level4_pgt = __pa(pgd);
        return 0;
 }
 
index 705e3ff..5db706f 100644 (file)
@@ -1,9 +1,11 @@
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/memblock.h>
 
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/realmode.h>
+#include <asm/tlbflush.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
 /* Hold the pgd entry used on booting additional CPUs */
 pgd_t trampoline_pgd_entry;
 
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
+{
+       void *base = __va(mem);
+
+       real_mode_header = (struct real_mode_header *) base;
+       printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+              base, (unsigned long long)mem, size);
+}
+
 void __init reserve_real_mode(void)
 {
        phys_addr_t mem;
-       unsigned char *base;
-       size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+       size_t size = real_mode_size_needed();
+
+       if (!size)
+               return;
+
+       WARN_ON(slab_is_available());
 
        /* Has to be under 1M so we can execute real-mode AP code. */
        mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-       if (!mem)
-               panic("Cannot allocate trampoline\n");
+       if (!mem) {
+               pr_info("No sub-1M memory is available for the trampoline\n");
+               return;
+       }
 
-       base = __va(mem);
        memblock_reserve(mem, size);
-       real_mode_header = (struct real_mode_header *) base;
-       printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
-              base, (unsigned long long)mem, size);
+       set_real_mode_mem(mem, size);
 }
 
-void __init setup_real_mode(void)
+static void __init setup_real_mode(void)
 {
        u16 real_mode_seg;
        const u32 *rel;
@@ -84,7 +98,7 @@ void __init setup_real_mode(void)
 
        trampoline_header->start = (u64) secondary_startup_64;
        trampoline_cr4_features = &trampoline_header->cr4;
-       *trampoline_cr4_features = __read_cr4();
+       *trampoline_cr4_features = mmu_cr4_features;
 
        trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
        trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@ void __init setup_real_mode(void)
  * need to mark it executable at do_pre_smp_initcalls() at least,
  * thus run it as a early_initcall().
  */
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
 {
        unsigned char *base = (unsigned char *) real_mode_header;
        size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
        set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
        set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
        set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+       if (!real_mode_header)
+               panic("Real mode trampoline was not allocated");
+
+       setup_real_mode();
+       set_real_mode_permissions();
 
        return 0;
 }
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);
index 8ffb089..b86ebb1 100644 (file)
@@ -118,7 +118,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
index f394775..aa73540 100644 (file)
@@ -667,18 +667,19 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
 
-       if (bio_op(bio) == REQ_OP_DISCARD)
-               goto integrity_clone;
-
-       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
+               break;
+       case REQ_OP_WRITE_SAME:
                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
-               goto integrity_clone;
+               break;
+       default:
+               bio_for_each_segment(bv, bio_src, iter)
+                       bio->bi_io_vec[bio->bi_vcnt++] = bv;
+               break;
        }
 
-       bio_for_each_segment(bv, bio_src, iter)
-               bio->bi_io_vec[bio->bi_vcnt++] = bv;
-
-integrity_clone:
        if (bio_integrity(bio_src)) {
                int ret;
 
@@ -1788,7 +1789,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
         * Discards need a mutable bio_vec to accommodate the payload
         * required by the DSM TRIM and UNMAP commands.
         */
-       if (bio_op(bio) == REQ_OP_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
                split = bio_clone_bioset(bio, gfp, bs);
        else
                split = bio_clone_fast(bio, gfp, bs);
index 999442e..36c7ac3 100644 (file)
@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-       queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+       spin_lock_irq(q->queue_lock);
+       queue_flag_set(QUEUE_FLAG_DYING, q);
+       spin_unlock_irq(q->queue_lock);
 
        if (q->mq_ops)
                blk_mq_wake_waiters(q);
index 3eec75a..2642e5f 100644 (file)
@@ -94,8 +94,30 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        bool do_split = true;
        struct bio *new = NULL;
        const unsigned max_sectors = get_max_io_size(q, bio);
+       unsigned bvecs = 0;
 
        bio_for_each_segment(bv, bio, iter) {
+               /*
+                * With arbitrary bio size, the incoming bio may be very
+                * big. We have to split the bio into small bios so that
+                * each holds at most BIO_MAX_PAGES bvecs because
+                * bio_clone() can fail to allocate big bvecs.
+                *
+                * It should have been better to apply the limit per
+                * request queue in which bio_clone() is involved,
+                * instead of globally. The biggest blocker is the
+                * bio_clone() in bio bounce.
+                *
+                * If bio is splitted by this reason, we should have
+                * allowed to continue bios merging, but don't do
+                * that now for making the change simple.
+                *
+                * TODO: deal with bio bounce's bio_clone() gracefully
+                * and convert the global limit into per-queue limit.
+                */
+               if (bvecs++ >= BIO_MAX_PAGES)
+                       goto split;
+
                /*
                 * If the queue doesn't support SG gaps and adding this
                 * offset would create a gap, disallow it.
@@ -172,12 +194,18 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
        struct bio *split, *res;
        unsigned nsegs;
 
-       if (bio_op(*bio) == REQ_OP_DISCARD)
+       switch (bio_op(*bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
                split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-       else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
+               break;
+       case REQ_OP_WRITE_SAME:
                split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
-       else
+               break;
+       default:
                split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+               break;
+       }
 
        /* physical segments can be figured out during splitting */
        res = split ? split : *bio;
@@ -213,7 +241,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
         * This should probably be returning 0, but blk_add_request_payload()
         * (Christoph!!!!)
         */
-       if (bio_op(bio) == REQ_OP_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
                return 1;
 
        if (bio_op(bio) == REQ_OP_WRITE_SAME)
@@ -385,7 +413,9 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
        nsegs = 0;
        cluster = blk_queue_cluster(q);
 
-       if (bio_op(bio) == REQ_OP_DISCARD) {
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
                /*
                 * This is a hack - drivers should be neither modifying the
                 * biovec, nor relying on bi_vcnt - but because of
@@ -393,19 +423,16 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
                 * a payload we need to set up here (thank you Christoph) and
                 * bi_vcnt is really the only way of telling if we need to.
                 */
-
-               if (bio->bi_vcnt)
-                       goto single_segment;
-
-               return 0;
-       }
-
-       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
-single_segment:
+               if (!bio->bi_vcnt)
+                       return 0;
+               /* Fall through */
+       case REQ_OP_WRITE_SAME:
                *sg = sglist;
                bvec = bio_iovec(bio);
                sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
                return 1;
+       default:
+               break;
        }
 
        for_each_bio(bio)
index e931a0e..13f5a6c 100644 (file)
@@ -793,11 +793,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
        struct list_head *dptr;
        int queued;
 
-       WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
        if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
                return;
 
+       WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+               cpu_online(hctx->next_cpu));
+
        hctx->run++;
 
        /*
@@ -1036,10 +1037,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 EXPORT_SYMBOL(blk_mq_delay_queue);
 
 static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
-                                           struct blk_mq_ctx *ctx,
                                            struct request *rq,
                                            bool at_head)
 {
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
+
        trace_block_rq_insert(hctx->queue, rq);
 
        if (at_head)
@@ -1053,20 +1055,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
 
-       __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+       __blk_mq_insert_req_list(hctx, rq, at_head);
        blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
 void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
-               bool async)
+                          bool async)
 {
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct request_queue *q = rq->q;
        struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
-       current_ctx = blk_mq_get_ctx(q);
-       if (!cpu_online(ctx->cpu))
-               rq->mq_ctx = ctx = current_ctx;
 
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
@@ -1076,8 +1074,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
 
        if (run_queue)
                blk_mq_run_hw_queue(hctx, async);
-
-       blk_mq_put_ctx(current_ctx);
 }
 
 static void blk_mq_insert_requests(struct request_queue *q,
@@ -1088,14 +1084,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
 
 {
        struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *current_ctx;
 
        trace_block_unplug(q, depth, !from_schedule);
 
-       current_ctx = blk_mq_get_ctx(q);
-
-       if (!cpu_online(ctx->cpu))
-               ctx = current_ctx;
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
        /*
@@ -1107,15 +1098,14 @@ static void blk_mq_insert_requests(struct request_queue *q,
                struct request *rq;
 
                rq = list_first_entry(list, struct request, queuelist);
+               BUG_ON(rq->mq_ctx != ctx);
                list_del_init(&rq->queuelist);
-               rq->mq_ctx = ctx;
-               __blk_mq_insert_req_list(hctx, ctx, rq, false);
+               __blk_mq_insert_req_list(hctx, rq, false);
        }
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
 
        blk_mq_run_hw_queue(hctx, from_schedule);
-       blk_mq_put_ctx(current_ctx);
 }
 
 static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1630,16 +1620,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
        return 0;
 }
 
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
 static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
 {
-       struct request_queue *q = hctx->queue;
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
 
-       /*
-        * Move ctx entries to new CPU, if this one is going away.
-        */
-       ctx = __blk_mq_get_ctx(q, cpu);
+       ctx = __blk_mq_get_ctx(hctx->queue, cpu);
 
        spin_lock(&ctx->lock);
        if (!list_empty(&ctx->rq_list)) {
@@ -1651,24 +1642,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
        if (list_empty(&tmp))
                return NOTIFY_OK;
 
-       ctx = blk_mq_get_ctx(q);
-       spin_lock(&ctx->lock);
-
-       while (!list_empty(&tmp)) {
-               struct request *rq;
-
-               rq = list_first_entry(&tmp, struct request, queuelist);
-               rq->mq_ctx = ctx;
-               list_move_tail(&rq->queuelist, &ctx->rq_list);
-       }
-
-       hctx = q->mq_ops->map_queue(q, ctx->cpu);
-       blk_mq_hctx_mark_pending(hctx, ctx);
-
-       spin_unlock(&ctx->lock);
+       spin_lock(&hctx->lock);
+       list_splice_tail_init(&tmp, &hctx->dispatch);
+       spin_unlock(&hctx->lock);
 
        blk_mq_run_hw_queue(hctx, true);
-       blk_mq_put_ctx(ctx);
        return NOTIFY_OK;
 }
 
index 7096c22..f7d973a 100644 (file)
@@ -366,7 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
 
-               if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
+               if (req_op(rq) != req_op(pos))
                        break;
                if (rq_data_dir(rq) != rq_data_dir(pos))
                        break;
index a9377be..84d7148 100644 (file)
@@ -439,7 +439,7 @@ config CRYPTO_CRC32C_INTEL
 
 config CRYPT_CRC32C_VPMSUM
        tristate "CRC32c CRC algorithm (powerpc64)"
-       depends on PPC64
+       depends on PPC64 && ALTIVEC
        select CRYPTO_HASH
        select CRC32
        help
index 6226439..7e8ed96 100644 (file)
 #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 
 static const u64 keccakf_rndc[24] = {
-       0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-       0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-       0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-       0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-       0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-       0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-       0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-       0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+       0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
+       0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
+       0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL,
+       0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+       0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL,
+       0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
+       0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL,
+       0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
 };
 
 static const int keccakf_rotc[24] = {
index 8c234dd..80cc7c0 100644 (file)
@@ -1527,11 +1527,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 {
        struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
        u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+       const u32 STATUS_MASK = 0x80000037;
 
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       return readl(mmio->addr.base + offset);
+       return readl(mmio->addr.base + offset) & STATUS_MASK;
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
index b71a9c7..e3d8e4c 100644 (file)
@@ -3706,22 +3706,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        if (UFDCS->rawcmd == 1)
                UFDCS->rawcmd = 2;
 
-       if (mode & (FMODE_READ|FMODE_WRITE)) {
-               UDRS->last_checked = 0;
-               clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-               check_disk_change(bdev);
-               if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-                       goto out;
-               if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+       if (!(mode & FMODE_NDELAY)) {
+               if (mode & (FMODE_READ|FMODE_WRITE)) {
+                       UDRS->last_checked = 0;
+                       clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+                       check_disk_change(bdev);
+                       if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+                               goto out;
+                       if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+                               goto out;
+               }
+               res = -EROFS;
+               if ((mode & FMODE_WRITE) &&
+                   !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
                        goto out;
        }
-
-       res = -EROFS;
-
-       if ((mode & FMODE_WRITE) &&
-                       !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-               goto out;
-
        mutex_unlock(&open_lock);
        mutex_unlock(&floppy_mutex);
        return 0;
index 1a04af6..6c6519f 100644 (file)
@@ -3950,6 +3950,7 @@ static void rbd_dev_release(struct device *dev)
        bool need_put = !!rbd_dev->opts;
 
        ceph_oid_destroy(&rbd_dev->header_oid);
+       ceph_oloc_destroy(&rbd_dev->header_oloc);
 
        rbd_put_client(rbd_dev->rbd_client);
        rbd_spec_put(rbd_dev->spec);
@@ -5336,15 +5337,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
        }
        spec->pool_id = (u64)rc;
 
-       /* The ceph file layout needs to fit pool id in 32 bits */
-
-       if (spec->pool_id > (u64)U32_MAX) {
-               rbd_warn(NULL, "pool id too large (%llu > %u)",
-                               (unsigned long long)spec->pool_id, U32_MAX);
-               rc = -EIO;
-               goto err_out_client;
-       }
-
        rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
        if (!rbd_dev) {
                rc = -ENOMEM;
index 1523e05..93b1aaa 100644 (file)
@@ -391,22 +391,16 @@ static int init_vq(struct virtio_blk *vblk)
                num_vqs = 1;
 
        vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
-       if (!vblk->vqs) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!vblk->vqs)
+               return -ENOMEM;
 
        names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-       if (!names)
-               goto err_names;
-
        callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-       if (!callbacks)
-               goto err_callbacks;
-
        vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
-       if (!vqs)
-               goto err_vqs;
+       if (!names || !callbacks || !vqs) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        for (i = 0; i < num_vqs; i++) {
                callbacks[i] = virtblk_done;
@@ -417,7 +411,7 @@ static int init_vq(struct virtio_blk *vblk)
        /* Discover virtqueues and write information to configuration.  */
        err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
        if (err)
-               goto err_find_vqs;
+               goto out;
 
        for (i = 0; i < num_vqs; i++) {
                spin_lock_init(&vblk->vqs[i].lock);
@@ -425,16 +419,12 @@ static int init_vq(struct virtio_blk *vblk)
        }
        vblk->num_vqs = num_vqs;
 
- err_find_vqs:
+out:
        kfree(vqs);
- err_vqs:
        kfree(callbacks);
- err_callbacks:
        kfree(names);
- err_names:
        if (err)
                kfree(vblk->vqs);
- out:
        return err;
 }
 
index be4fea6..88ef6d4 100644 (file)
@@ -189,6 +189,8 @@ struct blkfront_info
        struct mutex mutex;
        struct xenbus_device *xbdev;
        struct gendisk *gd;
+       u16 sector_size;
+       unsigned int physical_sector_size;
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
@@ -910,9 +912,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
        .map_queue = blk_mq_map_queue,
 };
 
+static void blkif_set_queue_limits(struct blkfront_info *info)
+{
+       struct request_queue *rq = info->rq;
+       struct gendisk *gd = info->gd;
+       unsigned int segments = info->max_indirect_segments ? :
+                               BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+       queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+       if (info->feature_discard) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+               blk_queue_max_discard_sectors(rq, get_capacity(gd));
+               rq->limits.discard_granularity = info->discard_granularity;
+               rq->limits.discard_alignment = info->discard_alignment;
+               if (info->feature_secdiscard)
+                       queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
+       }
+
+       /* Hard sector size and max sectors impersonate the equiv. hardware. */
+       blk_queue_logical_block_size(rq, info->sector_size);
+       blk_queue_physical_block_size(rq, info->physical_sector_size);
+       blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+       /* Ensure a merged request will fit in a single I/O ring slot. */
+       blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+
+       /* Make sure buffer addresses are sector-aligned. */
+       blk_queue_dma_alignment(rq, 511);
+
+       /* Make sure we don't use bounce buffers. */
+       blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-                               unsigned int physical_sector_size,
-                               unsigned int segments)
+                               unsigned int physical_sector_size)
 {
        struct request_queue *rq;
        struct blkfront_info *info = gd->private_data;
@@ -944,36 +982,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
        }
 
        rq->queuedata = info;
-       queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
-
-       if (info->feature_discard) {
-               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
-               blk_queue_max_discard_sectors(rq, get_capacity(gd));
-               rq->limits.discard_granularity = info->discard_granularity;
-               rq->limits.discard_alignment = info->discard_alignment;
-               if (info->feature_secdiscard)
-                       queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
-       }
-
-       /* Hard sector size and max sectors impersonate the equiv. hardware. */
-       blk_queue_logical_block_size(rq, sector_size);
-       blk_queue_physical_block_size(rq, physical_sector_size);
-       blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
-
-       /* Each segment in a request is up to an aligned page in size. */
-       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-       blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-       /* Ensure a merged request will fit in a single I/O ring slot. */
-       blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
-
-       /* Make sure buffer addresses are sector-aligned. */
-       blk_queue_dma_alignment(rq, 511);
-
-       /* Make sure we don't use bounce buffers. */
-       blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
-
-       gd->queue = rq;
+       info->rq = gd->queue = rq;
+       info->gd = gd;
+       info->sector_size = sector_size;
+       info->physical_sector_size = physical_sector_size;
+       blkif_set_queue_limits(info);
 
        return 0;
 }
@@ -1136,16 +1149,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
        gd->private_data = info;
        set_capacity(gd, capacity);
 
-       if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
-                                info->max_indirect_segments ? :
-                                BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+       if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
                del_gendisk(gd);
                goto release;
        }
 
-       info->rq = gd->queue;
-       info->gd = gd;
-
        xlvbd_flush(info);
 
        if (vdisk_info & VDISK_READONLY)
@@ -1315,7 +1323,7 @@ free_shadow:
                        rinfo->ring_ref[i] = GRANT_INVALID_REF;
                }
        }
-       free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+       free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
        rinfo->ring.sring = NULL;
 
        if (rinfo->irq)
@@ -2007,8 +2015,10 @@ static int blkif_recover(struct blkfront_info *info)
        struct split_bio *split_bio;
 
        blkfront_gather_backend_features(info);
+       /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
+       blkif_set_queue_limits(info);
        segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       blk_queue_max_segments(info->rq, segs);
+       blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
        for (r_index = 0; r_index < info->nr_rings; r_index++) {
                struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
@@ -2432,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info)
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
                                 info->xbdev->otherend);
-               return;
+               goto fail;
        }
 
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
@@ -2445,6 +2455,11 @@ static void blkfront_connect(struct blkfront_info *info)
        device_add_disk(&info->xbdev->dev, info->gd);
 
        info->is_ready = 1;
+       return;
+
+fail:
+       blkif_free(info, 0);
+       return;
 }
 
 /**
index 28bce3f..5770054 100644 (file)
@@ -8,6 +8,9 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#define pr_fmt(fmt)    "arm_arch_timer: " fmt
+
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -370,16 +373,33 @@ static bool arch_timer_has_nonsecure_ppi(void)
                arch_timer_ppi[PHYS_NONSECURE_PPI]);
 }
 
+static u32 check_ppi_trigger(int irq)
+{
+       u32 flags = irq_get_trigger_type(irq);
+
+       if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
+               pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
+               pr_warn("WARNING: Please fix your firmware\n");
+               flags = IRQF_TRIGGER_LOW;
+       }
+
+       return flags;
+}
+
 static int arch_timer_starting_cpu(unsigned int cpu)
 {
        struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+       u32 flags;
 
        __arch_timer_setup(ARCH_CP15_TIMER, clk);
 
-       enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
+       flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
+       enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
 
-       if (arch_timer_has_nonsecure_ppi())
-               enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
+       if (arch_timer_has_nonsecure_ppi()) {
+               flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]);
+               enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags);
+       }
 
        arch_counter_set_user_access();
        if (evtstrm_enable)
index 7e3fd37..92f6e4d 100644 (file)
@@ -66,10 +66,10 @@ static void kona_timer_disable_and_clear(void __iomem *base)
 
 }
 
-static void
+static int
 kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
 {
-       int loop_limit = 4;
+       int loop_limit = 3;
 
        /*
         * Read 64-bit free running counter
@@ -83,18 +83,19 @@ kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
         *      if new hi-word is equal to previously read hi-word then stop.
         */
 
-       while (--loop_limit) {
+       do {
                *msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
                *lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
                if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
                        break;
-       }
+       } while (--loop_limit);
        if (!loop_limit) {
                pr_err("bcm_kona_timer: getting counter failed.\n");
                pr_err(" Timer will be impacted\n");
+               return -ETIMEDOUT;
        }
 
-       return;
+       return 0;
 }
 
 static int kona_timer_set_next_event(unsigned long clc,
@@ -112,8 +113,11 @@ static int kona_timer_set_next_event(unsigned long clc,
 
        uint32_t lsw, msw;
        uint32_t reg;
+       int ret;
 
-       kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       if (ret)
+               return ret;
 
        /* Load the "next" event tick value */
        writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
index d91e872..b4b3ab5 100644 (file)
@@ -164,7 +164,7 @@ void __init gic_clocksource_init(unsigned int frequency)
        gic_start_count();
 }
 
-static void __init gic_clocksource_of_init(struct device_node *node)
+static int __init gic_clocksource_of_init(struct device_node *node)
 {
        struct clk *clk;
        int ret;
index 937e10b..3e1cb51 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/sched_clock.h>
 
+#include <clocksource/pxa.h>
+
 #include <asm/div64.h>
 
 #define OSMR0          0x00    /* OS Timer 0 Match Register */
index 97669ee..c83452c 100644 (file)
@@ -123,12 +123,16 @@ static struct clock_event_device sun4i_clockevent = {
        .set_next_event = sun4i_clkevt_next_event,
 };
 
+static void sun4i_timer_clear_interrupt(void)
+{
+       writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG);
+}
 
 static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
 {
        struct clock_event_device *evt = (struct clock_event_device *)dev_id;
 
-       writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+       sun4i_timer_clear_interrupt();
        evt->event_handler(evt);
 
        return IRQ_HANDLED;
@@ -208,6 +212,9 @@ static int __init sun4i_timer_init(struct device_node *node)
        /* Make sure timer is stopped before playing with interrupts */
        sun4i_clkevt_time_stop(0);
 
+       /* clear timer0 interrupt */
+       sun4i_timer_clear_interrupt();
+
        sun4i_clockevent.cpumask = cpu_possible_mask;
        sun4i_clockevent.irq = irq;
 
index 719b478..3c39e6f 100644 (file)
@@ -338,7 +338,6 @@ static int __init armada_xp_timer_init(struct device_node *np)
        struct clk *clk = of_clk_get_by_name(np, "fixed");
        int ret;
 
-       clk = of_clk_get(np, 0);
        if (IS_ERR(clk)) {
                pr_err("Failed to get clock");
                return PTR_ERR(clk);
index a7d9a08..a8e6c7d 100644 (file)
@@ -202,10 +202,10 @@ static int __init pistachio_clksrc_of_init(struct device_node *node)
        rate = clk_get_rate(fast_clk);
 
        /* Disable irq's for clocksource usage */
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
 
        /* Enable timer block */
        writel(TIMER_ME_GLOBAL, pcs_gpt.base);
index 1ffac0c..3494bc5 100644 (file)
@@ -261,6 +261,12 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
                return PTR_ERR(data->mck);
        }
 
+       ret = clk_prepare_enable(data->mck);
+       if (ret) {
+               pr_err("Unable to enable mck\n");
+               return ret;
+       }
+
        /* Get the interrupts property */
        data->irq = irq_of_parse_and_map(node, 0);
        if (!data->irq) {
index 87796e0..d3ffde8 100644 (file)
@@ -145,11 +145,30 @@ static struct powernv_pstate_info {
 /* Use following macros for conversions between pstate_id and index */
 static inline int idx_to_pstate(unsigned int i)
 {
+       if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
+               pr_warn_once("index %u is out of bound\n", i);
+               return powernv_freqs[powernv_pstate_info.nominal].driver_data;
+       }
+
        return powernv_freqs[i].driver_data;
 }
 
 static inline unsigned int pstate_to_idx(int pstate)
 {
+       int min = powernv_freqs[powernv_pstate_info.min].driver_data;
+       int max = powernv_freqs[powernv_pstate_info.max].driver_data;
+
+       if (min > 0) {
+               if (unlikely((pstate < max) || (pstate > min))) {
+                       pr_warn_once("pstate %d is out of bound\n", pstate);
+                       return powernv_pstate_info.nominal;
+               }
+       } else {
+               if (unlikely((pstate > max) || (pstate < min))) {
+                       pr_warn_once("pstate %d is out of bound\n", pstate);
+                       return powernv_pstate_info.nominal;
+               }
+       }
        /*
         * abs() is deliberately used so that is works with
         * both monotonically increasing and decreasing
@@ -593,7 +612,7 @@ void gpstate_timer_handler(unsigned long data)
        } else {
                gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
                                                 gpstates->highest_lpstate_idx,
-                                                freq_data.pstate_id);
+                                                gpstates->last_lpstate_idx);
        }
 
        /*
index ea8189f..6dc5971 100644 (file)
@@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
                               OP_ALG_AAI_CTR_MOD128);
        const bool is_rfc3686 = alg->caam.rfc3686;
 
+       if (!ctx->authsize)
+               return 0;
+
        /* NULL encryption / decryption */
        if (!ctx->enckeylen)
                return aead_null_set_sh_desc(aead);
@@ -614,7 +617,7 @@ skip_enc:
                keys_fit_inline = true;
 
        /* aead_givencrypt shared descriptor */
-       desc = ctx->sh_desc_givenc;
+       desc = ctx->sh_desc_enc;
 
        /* Note: Context registers are saved. */
        init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
@@ -645,13 +648,13 @@ copy_iv:
        append_operation(desc, ctx->class2_alg_type |
                         OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-       /* ivsize + cryptlen = seqoutlen - authsize */
-       append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
        /* Read and write assoclen bytes */
        append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
        append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
+       /* ivsize + cryptlen = seqoutlen - authsize */
+       append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+
        /* Skip assoc data */
        append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
 
@@ -697,7 +700,7 @@ copy_iv:
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
+       if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
index f1ecc8d..36365b3 100644 (file)
@@ -1898,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template,
                         template->name);
                snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                         template->driver_name);
+               t_alg->ahash_alg.setkey = NULL;
        }
        alg->cra_module = THIS_MODULE;
        alg->cra_init = caam_hash_cra_init;
index dfb1685..1f01e98 100644 (file)
@@ -116,6 +116,9 @@ static int dax_pmem_probe(struct device *dev)
        if (rc)
                return rc;
 
+       /* adjust the dax_region resource to the start of data */
+       res.start += le64_to_cpu(pfn_sb->dataoff);
+
        nd_region = to_nd_region(dev->parent);
        dax_region = alloc_dax_region(dev, nd_region->id, &res,
                        le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
index d0c1dab..dff1a4a 100644 (file)
@@ -251,6 +251,14 @@ config EDAC_SBRIDGE
          Support for error detection and correction the Intel
          Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
 
+config EDAC_SKX
+       tristate "Intel Skylake server Integrated MC"
+       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+       depends on PCI_MMCONFIG
+       help
+         Support for error detection and correction the Intel
+         Skylake server Integrated Memory Controllers.
+
 config EDAC_MPC85XX
        tristate "Freescale MPC83xx / MPC85xx"
        depends on EDAC_MM_EDAC && FSL_SOC
index f9e4a3e..9860499 100644 (file)
@@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400)              += i5400_edac.o
 obj-$(CONFIG_EDAC_I7300)               += i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)              += i7core_edac.o
 obj-$(CONFIG_EDAC_SBRIDGE)             += sb_edac.o
+obj-$(CONFIG_EDAC_SKX)                 += skx_edac.o
 obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
index 4fb2eb7..ce0067b 100644 (file)
@@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
 /* Knight's Landing Support */
 /*
  * KNL's memory channels are swizzled between memory controllers.
- * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
+ * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2
  */
-#define knl_channel_remap(channel) ((channel + 3) % 6)
+#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3)
 
 /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
@@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg)
        mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
        chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
 
-       return knl_channel_remap(mc*3 + chan);
+       return knl_channel_remap(mc, chan);
 }
 
 /*
@@ -2997,8 +2997,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
                } else {
                        char A = *("A");
 
-                       channel = knl_channel_remap(channel);
+                       /*
+                        * Reported channel is in range 0-2, so we can't map it
+                        * back to mc. To figure out mc we check machine check
+                        * bank register that reported this error.
+                        * bank15 means mc0 and bank16 means mc1.
+                        */
+                       channel = knl_channel_remap(m->bank == 16, channel);
                        channel_mask = 1 << channel;
+
                        snprintf(msg, sizeof(msg),
                                "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
                                overflow ? " OVERFLOW" : "",
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
new file mode 100644 (file)
index 0000000..0ff4878
--- /dev/null
@@ -0,0 +1,1121 @@
+/*
+ * EDAC driver for Intel(R) Xeon(R) Skylake processors
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+#define SKX_REVISION    " Ver: 1.0 "
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...)                 \
+       edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...)         \
+       edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+       (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
+
+static LIST_HEAD(skx_edac_list);
+
+static u64 skx_tolm, skx_tohm;
+
+#define NUM_IMC                        2       /* memory controllers per socket */
+#define NUM_CHANNELS           3       /* channels per memory controller */
+#define NUM_DIMMS              2       /* Max DIMMS per channel */
+
+#define        MASK26  0x3FFFFFF               /* Mask for 2^26 */
+#define MASK29 0x1FFFFFFF              /* Mask for 2^29 */
+
+/*
+ * Each cpu socket contains some pci devices that provide global
+ * information, and also some that are local to each of the two
+ * memory controllers on the die.
+ */
+struct skx_dev {
+       struct list_head        list;
+       u8                      bus[4];
+       struct pci_dev  *sad_all;
+       struct pci_dev  *util_all;
+       u32     mcroute;
+       struct skx_imc {
+               struct mem_ctl_info *mci;
+               u8      mc;     /* system wide mc# */
+               u8      lmc;    /* socket relative mc# */
+               u8      src_id, node_id;
+               struct skx_channel {
+                       struct pci_dev *cdev;
+                       struct skx_dimm {
+                               u8      close_pg;
+                               u8      bank_xor_enable;
+                               u8      fine_grain_bank;
+                               u8      rowbits;
+                               u8      colbits;
+                       } dimms[NUM_DIMMS];
+               } chan[NUM_CHANNELS];
+       } imc[NUM_IMC];
+};
+static int skx_num_sockets;
+
+struct skx_pvt {
+       struct skx_imc  *imc;
+};
+
+struct decoded_addr {
+       struct skx_dev *dev;
+       u64     addr;
+       int     socket;
+       int     imc;
+       int     channel;
+       u64     chan_addr;
+       int     sktways;
+       int     chanways;
+       int     dimm;
+       int     rank;
+       int     channel_rank;
+       u64     rank_address;
+       int     row;
+       int     column;
+       int     bank_address;
+       int     bank_group;
+};
+
+static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
+{
+       struct skx_dev *d;
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               if (d->bus[idx] == bus)
+                       return d;
+       }
+
+       return NULL;
+}
+
+enum munittype {
+       CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+};
+
+struct munit {
+       u16     did;
+       u16     devfn[NUM_IMC];
+       u8      busidx;
+       u8      per_socket;
+       enum munittype mtype;
+};
+
+/*
+ * List of PCI device ids that we need together with some device
+ * number and function numbers to tell which memory controller the
+ * device belongs to.
+ */
+static const struct munit skx_all_munits[] = {
+       { 0x2054, { }, 1, 1, SAD_ALL },
+       { 0x2055, { }, 1, 1, UTIL_ALL },
+       { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
+       { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
+       { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+       { 0x208e, { }, 1, 0, SAD },
+       { }
+};
+
+/*
+ * We use the per-socket device 0x2016 to count how many sockets are present,
+ * and to detemine which PCI buses are associated with each socket. Allocate
+ * and build the full list of all the skx_dev structures that we need here.
+ */
+static int get_all_bus_mappings(void)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               d = kzalloc(sizeof(*d), GFP_KERNEL);
+               if (!d) {
+                       pci_dev_put(pdev);
+                       return -ENOMEM;
+               }
+               pci_read_config_dword(pdev, 0xCC, &reg);
+               d->bus[0] =  GET_BITFIELD(reg, 0, 7);
+               d->bus[1] =  GET_BITFIELD(reg, 8, 15);
+               d->bus[2] =  GET_BITFIELD(reg, 16, 23);
+               d->bus[3] =  GET_BITFIELD(reg, 24, 31);
+               edac_dbg(2, "busses: %x, %x, %x, %x\n",
+                        d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+               list_add_tail(&d->list, &skx_edac_list);
+               skx_num_sockets++;
+               prev = pdev;
+       }
+
+       return ndev;
+}
+
+static int get_all_munits(const struct munit *m)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int i = 0, ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               if (m->per_socket == NUM_IMC) {
+                       for (i = 0; i < NUM_IMC; i++)
+                               if (m->devfn[i] == pdev->devfn)
+                                       break;
+                       if (i == NUM_IMC)
+                               goto fail;
+               }
+               d = get_skx_dev(pdev->bus->number, m->busidx);
+               if (!d)
+                       goto fail;
+
+               /* Be sure that the device is enabled */
+               if (unlikely(pci_enable_device(pdev) < 0)) {
+                       skx_printk(KERN_ERR,
+                               "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
+                       goto fail;
+               }
+
+               switch (m->mtype) {
+               case CHAN0: case CHAN1: case CHAN2:
+                       pci_dev_get(pdev);
+                       d->imc[i].chan[m->mtype].cdev = pdev;
+                       break;
+               case SAD_ALL:
+                       pci_dev_get(pdev);
+                       d->sad_all = pdev;
+                       break;
+               case UTIL_ALL:
+                       pci_dev_get(pdev);
+                       d->util_all = pdev;
+                       break;
+               case SAD:
+                       /*
+                        * one of these devices per core, including cores
+                        * that don't exist on this SKU. Ignore any that
+                        * read a route table of zero, make sure all the
+                        * non-zero values match.
+                        */
+                       pci_read_config_dword(pdev, 0xB4, &reg);
+                       if (reg != 0) {
+                               if (d->mcroute == 0)
+                                       d->mcroute = reg;
+                               else if (d->mcroute != reg) {
+                                       skx_printk(KERN_ERR,
+                                               "mcroute mismatch\n");
+                                       goto fail;
+                               }
+                       }
+                       ndev--;
+                       break;
+               }
+
+               prev = pdev;
+       }
+
+       return ndev;
+fail:
+       pci_dev_put(pdev);
+       return -ENODEV;
+}
+
+const struct x86_cpu_id skx_cpuids[] = {
+       { X86_VENDOR_INTEL, 6, 0x55, 0, 0 },    /* Skylake */
+       { }
+};
+MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
+
+static u8 get_src_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF0, &reg);
+
+       return GET_BITFIELD(reg, 12, 14);
+}
+
+static u8 skx_get_node_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF4, &reg);
+
+       return GET_BITFIELD(reg, 0, 2);
+}
+
+static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
+                        int maxval, char *name)
+{
+       u32 val = GET_BITFIELD(reg, lobit, hibit);
+
+       if (val < minval || val > maxval) {
+               edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
+               return -EINVAL;
+       }
+       return val + add;
+}
+
+#define IS_DIMM_PRESENT(mtr)           GET_BITFIELD((mtr), 15, 15)
+
+#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
+#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
+#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
+
+static int get_width(u32 mtr)
+{
+       switch (GET_BITFIELD(mtr, 8, 9)) {
+       case 0:
+               return DEV_X4;
+       case 1:
+               return DEV_X8;
+       case 2:
+               return DEV_X16;
+       }
+       return DEV_UNKNOWN;
+}
+
+static int skx_get_hi_lo(void)
+{
+       struct pci_dev *pdev;
+       u32 reg;
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
+       if (!pdev) {
+               edac_dbg(0, "Can't get tolm/tohm\n");
+               return -ENODEV;
+       }
+
+       pci_read_config_dword(pdev, 0xD0, &reg);
+       skx_tolm = reg;
+       pci_read_config_dword(pdev, 0xD4, &reg);
+       skx_tohm = reg;
+       pci_read_config_dword(pdev, 0xD8, &reg);
+       skx_tohm |= (u64)reg << 32;
+
+       pci_dev_put(pdev);
+       edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
+
+       return 0;
+}
+
+static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+                        struct skx_imc *imc, int chan, int dimmno)
+{
+       int  banks = 16, ranks, rows, cols, npages;
+       u64 size;
+
+       if (!IS_DIMM_PRESENT(mtr))
+               return 0;
+       ranks = numrank(mtr);
+       rows = numrow(mtr);
+       cols = numcol(mtr);
+
+       /*
+        * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
+        */
+       size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
+       npages = MiB_TO_PAGES(size);
+
+       edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+                imc->mc, chan, dimmno, size, npages,
+                banks, ranks, rows, cols);
+
+       imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
+       imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+       imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
+       imc->chan[chan].dimms[dimmno].rowbits = rows;
+       imc->chan[chan].dimms[dimmno].colbits = cols;
+
+       dimm->nr_pages = npages;
+       dimm->grain = 32;
+       dimm->dtype = get_width(mtr);
+       dimm->mtype = MEM_DDR4;
+       dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+       snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+                imc->src_id, imc->lmc, chan, dimmno);
+
+       return 1;
+}
+
+#define SKX_GET_MTMTR(dev, reg) \
+       pci_read_config_dword((dev), 0x87c, &reg)
+
+static bool skx_check_ecc(struct pci_dev *pdev)
+{
+       u32 mtmtr;
+
+       SKX_GET_MTMTR(pdev, mtmtr);
+
+       return !!GET_BITFIELD(mtmtr, 2, 2);
+}
+
+static int skx_get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct skx_pvt *pvt = mci->pvt_info;
+       struct skx_imc *imc = pvt->imc;
+       struct dimm_info *dimm;
+       int i, j;
+       u32 mtr, amap;
+       int ndimms;
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               ndimms = 0;
+               pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
+               for (j = 0; j < NUM_DIMMS; j++) {
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                            mci->n_layers, i, j, 0);
+                       pci_read_config_dword(imc->chan[i].cdev,
+                                       0x80 + 4*j, &mtr);
+                       ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
+               }
+               if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+                       skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
+static void skx_unregister_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci = imc->mci;
+
+       if (!mci)
+               return;
+
+       edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
+
+       /* Remove MC sysfs nodes */
+       edac_mc_del_mc(mci->pdev);
+
+       edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+}
+
+static int skx_register_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci;
+       struct edac_mc_layer layers[2];
+       struct pci_dev *pdev = imc->chan[0].cdev;
+       struct skx_pvt *pvt;
+       int rc;
+
+       /* allocate a new MC control structure */
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = NUM_CHANNELS;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = NUM_DIMMS;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
+                           sizeof(struct skx_pvt));
+
+       if (unlikely(!mci))
+               return -ENOMEM;
+
+       edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
+
+       /* Associate skx_dev and mci for future usage */
+       imc->mci = mci;
+       pvt = mci->pvt_info;
+       pvt->imc = imc;
+
+       mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
+                                 imc->node_id, imc->lmc);
+       mci->mtype_cap = MEM_FLAG_DDR4;
+       mci->edac_ctl_cap = EDAC_FLAG_NONE;
+       mci->edac_cap = EDAC_FLAG_NONE;
+       mci->mod_name = "skx_edac.c";
+       mci->dev_name = pci_name(imc->chan[0].cdev);
+       mci->mod_ver = SKX_REVISION;
+       mci->ctl_page_to_phys = NULL;
+
+       rc = skx_get_dimm_config(mci);
+       if (rc < 0)
+               goto fail;
+
+       /* record ptr to the generic device */
+       mci->pdev = &pdev->dev;
+
+       /* add this new MC control structure to EDAC's list of MCs */
+       if (unlikely(edac_mc_add_mc(mci))) {
+               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+               rc = -EINVAL;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+       imc->mci = NULL;
+       return rc;
+}
+
+#define        SKX_MAX_SAD 24
+
+#define SKX_GET_SAD(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
+#define SKX_GET_ILV(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
+
+#define        SKX_SAD_MOD3MODE(sad)   GET_BITFIELD((sad), 30, 31)
+#define        SKX_SAD_MOD3(sad)       GET_BITFIELD((sad), 27, 27)
+#define SKX_SAD_LIMIT(sad)     (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
+#define        SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
+#define        SKX_SAD_ATTR(sad)       GET_BITFIELD((sad), 3, 4)
+#define        SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
+#define SKX_SAD_ENABLE(sad)    GET_BITFIELD((sad), 0, 0)
+
+#define SKX_ILV_REMOTE(tgt)    (((tgt) & 8) == 0)
+#define SKX_ILV_TARGET(tgt)    ((tgt) & 7)
+
+static bool skx_sad_decode(struct decoded_addr *res)
+{
+       struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
+       u64 addr = res->addr;
+       int i, idx, tgt, lchan, shift;
+       u32 sad, ilv;
+       u64 limit, prev_limit;
+       int remote = 0;
+
+       /* Simple sanity check for I/O space or out of range */
+       if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
+               edac_dbg(0, "Address %llx out of range\n", addr);
+               return false;
+       }
+
+restart:
+       prev_limit = 0;
+       for (i = 0; i < SKX_MAX_SAD; i++) {
+               SKX_GET_SAD(d, i, sad);
+               limit = SKX_SAD_LIMIT(sad);
+               if (SKX_SAD_ENABLE(sad)) {
+                       if (addr >= prev_limit && addr <= limit)
+                               goto sad_found;
+               }
+               prev_limit = limit + 1;
+       }
+       edac_dbg(0, "No SAD entry for %llx\n", addr);
+       return false;
+
+sad_found:
+       SKX_GET_ILV(d, i, ilv);
+
+       switch (SKX_SAD_INTERLEAVE(sad)) {
+       case 0:
+               idx = GET_BITFIELD(addr, 6, 8);
+               break;
+       case 1:
+               idx = GET_BITFIELD(addr, 8, 10);
+               break;
+       case 2:
+               idx = GET_BITFIELD(addr, 12, 14);
+               break;
+       case 3:
+               idx = GET_BITFIELD(addr, 30, 32);
+               break;
+       }
+
+       tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
+
+       /* If point to another node, find it and start over */
+       if (SKX_ILV_REMOTE(tgt)) {
+               if (remote) {
+                       edac_dbg(0, "Double remote!\n");
+                       return false;
+               }
+               remote = 1;
+               list_for_each_entry(d, &skx_edac_list, list) {
+                       if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
+                               goto restart;
+               }
+               edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
+               return false;
+       }
+
+       if (SKX_SAD_MOD3(sad) == 0)
+               lchan = SKX_ILV_TARGET(tgt);
+       else {
+               switch (SKX_SAD_MOD3MODE(sad)) {
+               case 0:
+                       shift = 6;
+                       break;
+               case 1:
+                       shift = 8;
+                       break;
+               case 2:
+                       shift = 12;
+                       break;
+               default:
+                       edac_dbg(0, "illegal mod3mode\n");
+                       return false;
+               }
+               switch (SKX_SAD_MOD3ASMOD2(sad)) {
+               case 0:
+                       lchan = (addr >> shift) % 3;
+                       break;
+               case 1:
+                       lchan = (addr >> shift) % 2;
+                       break;
+               case 2:
+                       lchan = (addr >> shift) % 2;
+                       lchan = (lchan << 1) | ~lchan;
+                       break;
+               case 3:
+                       lchan = ((addr >> shift) % 2) << 1;
+                       break;
+               }
+               lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
+       }
+
+       res->dev = d;
+       res->socket = d->imc[0].src_id;
+       res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
+       res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
+
+       edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
+                res->addr, res->socket, res->imc, res->channel);
+       return true;
+}
+
+#define        SKX_MAX_TAD 8
+
+#define SKX_GET_TADBASE(d, mc, i, reg)                 \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
+#define SKX_GET_TADWAYNESS(d, mc, i, reg)              \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
+#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg)     \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
+
+#define        SKX_TAD_BASE(b)         ((u64)GET_BITFIELD((b), 12, 31) << 26)
+#define SKX_TAD_SKT_GRAN(b)    GET_BITFIELD((b), 4, 5)
+#define SKX_TAD_CHN_GRAN(b)    GET_BITFIELD((b), 6, 7)
+#define        SKX_TAD_LIMIT(b)        (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
+#define        SKX_TAD_OFFSET(b)       ((u64)GET_BITFIELD((b), 4, 23) << 26)
+#define        SKX_TAD_SKTWAYS(b)      (1 << GET_BITFIELD((b), 10, 11))
+#define        SKX_TAD_CHNWAYS(b)      (GET_BITFIELD((b), 8, 9) + 1)
+
+/* which bit used for both socket and channel interleave */
+static int skx_granularity[] = { 6, 8, 12, 30 };
+
+static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
+{
+       addr >>= shift;
+       addr /= ways;
+       addr <<= shift;
+
+       return addr | (lowbits & ((1ull << shift) - 1));
+}
+
+static bool skx_tad_decode(struct decoded_addr *res)
+{
+       int i;
+       u32 base, wayness, chnilvoffset;
+       int skt_interleave_bit, chn_interleave_bit;
+       u64 channel_addr;
+
+       for (i = 0; i < SKX_MAX_TAD; i++) {
+               SKX_GET_TADBASE(res->dev, res->imc, i, base);
+               SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
+               if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
+                       goto tad_found;
+       }
+       edac_dbg(0, "No TAD entry for %llx\n", res->addr);
+       return false;
+
+tad_found:
+       res->sktways = SKX_TAD_SKTWAYS(wayness);
+       res->chanways = SKX_TAD_CHNWAYS(wayness);
+       skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
+       chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
+
+       SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
+       channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
+
+       if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
+               /* Must handle channel first, then socket */
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, channel_addr);
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, channel_addr);
+       } else {
+               /* Handle socket then channel. Preserve low bits from original address */
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, res->addr);
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, res->addr);
+       }
+
+       res->chan_addr = channel_addr;
+
+       edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n",
+                res->addr, res->chan_addr, res->sktways, res->chanways);
+       return true;
+}
+
+#define SKX_MAX_RIR 4
+
+#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg)          \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x108 + 4 * (i), &reg)
+#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg)         \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x120 + 16 * idx + 4 * (i), &reg)
+
+#define        SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
+#define        SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
+#define        SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
+#define        SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
+#define        SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
+
+static bool skx_rir_decode(struct decoded_addr *res)
+{
+       int i, idx, chan_rank;
+       int shift;
+       u32 rirway, rirlv;
+       u64 rank_addr, prev_limit = 0, limit;
+
+       if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
+               shift = 6;
+       else
+               shift = 13;
+
+       for (i = 0; i < SKX_MAX_RIR; i++) {
+               SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
+               limit = SKX_RIR_LIMIT(rirway);
+               if (SKX_RIR_VALID(rirway)) {
+                       if (prev_limit <= res->chan_addr &&
+                           res->chan_addr <= limit)
+                               goto rir_found;
+               }
+               prev_limit = limit;
+       }
+       edac_dbg(0, "No RIR entry for %llx\n", res->addr);
+       return false;
+
+rir_found:
+       rank_addr = res->chan_addr >> shift;
+       rank_addr /= SKX_RIR_WAYS(rirway);
+       rank_addr <<= shift;
+       rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
+
+       res->rank_address = rank_addr;
+       idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
+
+       SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
+       res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
+       chan_rank = SKX_RIR_CHAN_RANK(rirlv);
+       res->channel_rank = chan_rank;
+       res->dimm = chan_rank / 4;
+       res->rank = chan_rank % 4;
+
+       edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
+                res->addr, res->dimm, res->rank,
+                res->channel_rank, res->rank_address);
+       return true;
+}
+
+static u8 skx_close_row[] = {
+       15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+};
+static u8 skx_close_column[] = {
+       3, 4, 5, 14, 19, 23, 24, 25, 26, 27
+};
+static u8 skx_open_row[] = {
+       14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+};
+static u8 skx_open_column[] = {
+       3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+};
+static u8 skx_open_fine_column[] = {
+       3, 4, 5, 7, 8, 9, 10, 11, 12, 13
+};
+
+static int skx_bits(u64 addr, int nbits, u8 *bits)
+{
+       int i, res = 0;
+
+       for (i = 0; i < nbits; i++)
+               res |= ((addr >> bits[i]) & 1) << i;
+       return res;
+}
+
+static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
+{
+       int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
+
+       if (do_xor)
+               ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
+
+       return ret;
+}
+
+static bool skx_mad_decode(struct decoded_addr *r)
+{
+       struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
+       int bg0 = dimm->fine_grain_bank ? 6 : 13;
+
+       if (dimm->close_pg) {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
+               r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
+               r->column |= 0x400; /* C10 is autoprecharge, always set */
+               r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
+               r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
+       } else {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
+               if (dimm->fine_grain_bank)
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
+               else
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
+               r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
+               r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
+       }
+       r->row &= (1u << dimm->rowbits) - 1;
+
+       edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n",
+                r->addr, r->row, r->column, r->bank_address,
+                r->bank_group);
+       return true;
+}
+
+static bool skx_decode(struct decoded_addr *res)
+{
+
+       return skx_sad_decode(res) && skx_tad_decode(res) &&
+               skx_rir_decode(res) && skx_mad_decode(res);
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr.
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static struct dentry *skx_test;
+static u64 skx_fake_addr;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+       struct decoded_addr res;
+
+       res.addr = val;
+       skx_decode(&res);
+
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static struct dentry *mydebugfs_create(const char *name, umode_t mode,
+                                      struct dentry *parent, u64 *value)
+{
+       return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
+}
+
+static void setup_skx_debug(void)
+{
+       skx_test = debugfs_create_dir("skx_edac_test", NULL);
+       mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr);
+}
+
+static void teardown_skx_debug(void)
+{
+       debugfs_remove_recursive(skx_test);
+}
+#else
+static void setup_skx_debug(void)
+{
+}
+
+static void teardown_skx_debug(void)
+{
+}
+#endif /*CONFIG_EDAC_DEBUG*/
+
+static void skx_mce_output_error(struct mem_ctl_info *mci,
+                                const struct mce *m,
+                                struct decoded_addr *res)
+{
+       enum hw_event_mc_err_type tp_event;
+       char *type, *optype, msg[256];
+       bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+       bool overflow = GET_BITFIELD(m->status, 62, 62);
+       bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+       bool recoverable;
+       u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+       u32 mscod = GET_BITFIELD(m->status, 16, 31);
+       u32 errcode = GET_BITFIELD(m->status, 0, 15);
+       u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+
+       recoverable = GET_BITFIELD(m->status, 56, 56);
+
+       if (uncorrected_error) {
+               if (ripv) {
+                       type = "FATAL";
+                       tp_event = HW_EVENT_ERR_FATAL;
+               } else {
+                       type = "NON_FATAL";
+                       tp_event = HW_EVENT_ERR_UNCORRECTED;
+               }
+       } else {
+               type = "CORRECTED";
+               tp_event = HW_EVENT_ERR_CORRECTED;
+       }
+
+       /*
+        * According with Table 15-9 of the Intel Architecture spec vol 3A,
+        * memory errors should fit in this mask:
+        *      000f 0000 1mmm cccc (binary)
+        * where:
+        *      f = Correction Report Filtering Bit. If 1, subsequent errors
+        *          won't be shown
+        *      mmm = error type
+        *      cccc = channel
+        * If the mask doesn't match, report an error to the parsing logic
+        */
+       if (!((errcode & 0xef80) == 0x80)) {
+               optype = "Can't parse: it is not a mem";
+       } else {
+               switch (optypenum) {
+               case 0:
+                       optype = "generic undef request error";
+                       break;
+               case 1:
+                       optype = "memory read error";
+                       break;
+               case 2:
+                       optype = "memory write error";
+                       break;
+               case 3:
+                       optype = "addr/cmd error";
+                       break;
+               case 4:
+                       optype = "memory scrubbing error";
+                       break;
+               default:
+                       optype = "reserved";
+                       break;
+               }
+       }
+
+       snprintf(msg, sizeof(msg),
+                "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+                overflow ? " OVERFLOW" : "",
+                (uncorrected_error && recoverable) ? " recoverable" : "",
+                mscod, errcode,
+                res->socket, res->imc, res->rank,
+                res->bank_group, res->bank_address, res->row, res->column);
+
+       edac_dbg(0, "%s\n", msg);
+
+       /* Call the helper to output message */
+       edac_mc_handle_error(tp_event, mci, core_err_cnt,
+                            m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+                            res->channel, res->dimm, -1,
+                            optype, msg);
+}
+
+static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+                              void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct decoded_addr res;
+       struct mem_ctl_info *mci;
+       char *type;
+
+       if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+               return NOTIFY_DONE;
+
+       /* ignore unless this is memory related with an address */
+       if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+               return NOTIFY_DONE;
+
+       res.addr = mce->addr;
+       if (!skx_decode(&res))
+               return NOTIFY_DONE;
+       mci = res.dev->imc[res.imc].mci;
+
+       if (mce->mcgstatus & MCG_STATUS_MCIP)
+               type = "Exception";
+       else
+               type = "Event";
+
+       skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+       skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
+                         "Bank %d: %016Lx\n", mce->extcpu, type,
+                         mce->mcgstatus, mce->bank, mce->status);
+       skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
+       skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
+       skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
+
+       skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
+                         "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
+                         mce->time, mce->socketid, mce->apicid);
+
+       skx_mce_output_error(mci, mce, &res);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block skx_mce_dec = {
+       .notifier_call = skx_mce_check_error,
+};
+
+static void skx_remove(void)
+{
+       int i, j;
+       struct skx_dev *d, *tmp;
+
+       edac_dbg(0, "\n");
+
+       list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
+               list_del(&d->list);
+               for (i = 0; i < NUM_IMC; i++) {
+                       skx_unregister_mci(&d->imc[i]);
+                       for (j = 0; j < NUM_CHANNELS; j++)
+                               pci_dev_put(d->imc[i].chan[j].cdev);
+               }
+               pci_dev_put(d->util_all);
+               pci_dev_put(d->sad_all);
+
+               kfree(d);
+       }
+}
+
+/*
+ * skx_init:
+ *     make sure we are running on the correct cpu model
+ *     search for all the devices we need
+ *     check which DIMMs are present.
+ */
+int __init skx_init(void)
+{
+       const struct x86_cpu_id *id;
+       const struct munit *m;
+       int rc = 0, i;
+       u8 mc = 0, src_id, node_id;
+       struct skx_dev *d;
+
+       edac_dbg(2, "\n");
+
+       id = x86_match_cpu(skx_cpuids);
+       if (!id)
+               return -ENODEV;
+
+       rc = skx_get_hi_lo();
+       if (rc)
+               return rc;
+
+       rc = get_all_bus_mappings();
+       if (rc < 0)
+               goto fail;
+       if (rc == 0) {
+               edac_dbg(2, "No memory controllers found\n");
+               return -ENODEV;
+       }
+
+       for (m = skx_all_munits; m->did; m++) {
+               rc = get_all_munits(m);
+               if (rc < 0)
+                       goto fail;
+               if (rc != m->per_socket * skx_num_sockets) {
+                       edac_dbg(2, "Expected %d, got %d of %x\n",
+                                m->per_socket * skx_num_sockets, rc, m->did);
+                       rc = -ENODEV;
+                       goto fail;
+               }
+       }
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               src_id = get_src_id(d);
+               node_id = skx_get_node_id(d);
+               edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+               for (i = 0; i < NUM_IMC; i++) {
+                       d->imc[i].mc = mc++;
+                       d->imc[i].lmc = i;
+                       d->imc[i].src_id = src_id;
+                       d->imc[i].node_id = node_id;
+                       rc = skx_register_mci(&d->imc[i]);
+                       if (rc < 0)
+                               goto fail;
+               }
+       }
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       setup_skx_debug();
+
+       mce_register_decode_chain(&skx_mce_dec);
+
+       return 0;
+fail:
+       skx_remove();
+       return rc;
+}
+
+static void __exit skx_exit(void)
+{
+       edac_dbg(2, "\n");
+       mce_unregister_decode_chain(&skx_mce_dec);
+       skx_remove();
+       teardown_skx_debug();
+}
+
+module_init(skx_init);
+module_exit(skx_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");
index c99c24b..9ae6c11 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/efi.h>
+#include <linux/vmalloc.h>
 
 #define NO_FURTHER_WRITE_ACTION -1
 
@@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
        int ret;
        void *cap_hdr_temp;
 
-       cap_hdr_temp = kmap(cap_info->pages[0]);
+       cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+                       VM_MAP, PAGE_KERNEL);
        if (!cap_hdr_temp) {
-               pr_debug("%s: kmap() failed\n", __func__);
+               pr_debug("%s: vmap() failed\n", __func__);
                return -EFAULT;
        }
 
        ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-       kunmap(cap_info->pages[0]);
+       vunmap(cap_hdr_temp);
        if (ret) {
                pr_err("%s: efi_capsule_update() failed\n", __func__);
                return ret;
index 53b9fd2..6eedff4 100644 (file)
@@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
  * map the capsule described by @capsule with its data in @pages and
  * send it to the firmware via the UpdateCapsule() runtime service.
  *
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * capsule_header_t that describes the entire contents of the capsule
  * must be at the start of the first data page.
  *
  * Even though this function will validate that the firmware supports
index 98dd47a..66a9410 100644 (file)
@@ -50,6 +50,7 @@ config GPIO_DEVRES
 config OF_GPIO
        def_bool y
        depends on OF
+       depends on HAS_IOMEM
 
 config GPIO_ACPI
        def_bool y
@@ -188,7 +189,7 @@ config GPIO_EP93XX
 config GPIO_ETRAXFS
        bool "Axis ETRAX FS General I/O"
        depends on CRIS || COMPILE_TEST
-       depends on OF
+       depends on OF_GPIO
        select GPIO_GENERIC
        select GPIOLIB_IRQCHIP
        help
@@ -214,7 +215,7 @@ config GPIO_GENERIC_PLATFORM
 
 config GPIO_GRGPIO
        tristate "Aeroflex Gaisler GRGPIO support"
-       depends on OF
+       depends on OF_GPIO
        select GPIO_GENERIC
        select IRQ_DOMAIN
        help
@@ -312,7 +313,7 @@ config GPIO_MPC8XXX
 config GPIO_MVEBU
        def_bool y
        depends on PLAT_ORION
-       depends on OF
+       depends on OF_GPIO
        select GENERIC_IRQ_CHIP
 
 config GPIO_MXC
@@ -405,7 +406,7 @@ config GPIO_TEGRA
        bool "NVIDIA Tegra GPIO support"
        default ARCH_TEGRA
        depends on ARCH_TEGRA || COMPILE_TEST
-       depends on OF
+       depends on OF_GPIO
        help
          Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
@@ -1099,7 +1100,7 @@ menu "SPI GPIO expanders"
 
 config GPIO_74X164
        tristate "74x164 serial-in/parallel-out 8-bits shift register"
-       depends on OF
+       depends on OF_GPIO
        help
          Driver for 74x164 compatible serial-in/parallel-out 8-outputs
          shift registers. This driver can be used to provide access
index 0880736..946d091 100644 (file)
@@ -192,6 +192,10 @@ int __max730x_probe(struct max7301 *ts)
        ts->chip.parent = dev;
        ts->chip.owner = THIS_MODULE;
 
+       ret = gpiochip_add_data(&ts->chip, ts);
+       if (ret)
+               goto exit_destroy;
+
        /*
         * initialize pullups according to platform data and cache the
         * register values for later use.
@@ -213,10 +217,6 @@ int __max730x_probe(struct max7301 *ts)
                }
        }
 
-       ret = gpiochip_add_data(&ts->chip, ts);
-       if (ret)
-               goto exit_destroy;
-
        return ret;
 
 exit_destroy:
index 8ebc5f1..700c56b 100644 (file)
@@ -426,6 +426,8 @@ struct amdgpu_mman {
 
        /* custom LRU management */
        struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
+       /* guard for log2_size array, don't add anything in between */
+       struct amdgpu_mman_lru                  guard;
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -646,9 +648,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages);
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist,
                     dma_addr_t *dma_addr, uint32_t flags);
 
index 9831753..fe872b8 100644 (file)
@@ -321,6 +321,19 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
                            (le16_to_cpu(path->usConnObjectId) &
                             OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
 
+                       /* Skip TV/CV support */
+                       if ((le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_TV1_SUPPORT) ||
+                           (le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_CV_SUPPORT))
+                               continue;
+
+                       if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+                               DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+                                         con_obj_id, le16_to_cpu(path->usDeviceTag));
+                               continue;
+                       }
+
                        connector_type =
                                object_connector_convert[con_obj_id];
                        connector_object_id = con_obj_id;
index 49de926..10b5ddf 100644 (file)
@@ -200,16 +200,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                atpx->functions.power_cntl = false;
-#endif
                atpx->is_hybrid = true;
        }
 
index 921bce2..0feea34 100644 (file)
@@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
  * Unbinds the requested pages from the gart page table and
  * replaces them with the dummy page (all asics).
  */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages)
 {
        unsigned t;
@@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
  * (all asics).
  * Returns 0 for success, -EINVAL for failure.
  */
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist, dma_addr_t *dma_addr,
                     uint32_t flags)
 {
index ff63b88..5cc7052 100644 (file)
@@ -305,7 +305,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = ddev->dev_private;
        char *table = NULL;
-       int size, i;
+       int size;
 
        if (adev->pp_enabled)
                size = amdgpu_dpm_get_pp_table(adev, &table);
@@ -315,10 +315,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
        if (size >= PAGE_SIZE)
                size = PAGE_SIZE - 1;
 
-       for (i = 0; i < size; i++) {
-               sprintf(buf + i, "%02x", table[i]);
-       }
-       sprintf(buf + i, "\n");
+       memcpy(buf, table, size);
 
        return size;
 }
index b7742e6..716f2af 100644 (file)
@@ -251,8 +251,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 
        adev = amdgpu_get_adev(bo->bdev);
        ring = adev->mman.buffer_funcs_ring;
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
 
        switch (old_mem->mem_type) {
        case TTM_PL_VRAM:
@@ -335,7 +335,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
        if (unlikely(r)) {
                goto out_cleanup;
        }
-       r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+       r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 out_cleanup:
        ttm_bo_mem_put(bo, &tmp_mem);
        return r;
@@ -368,7 +368,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
        if (unlikely(r)) {
                return r;
        }
-       r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+       r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
        if (unlikely(r)) {
                goto out_cleanup;
        }
@@ -950,6 +950,8 @@ static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
        struct list_head *res = lru->lru[tbo->mem.mem_type];
 
        lru->lru[tbo->mem.mem_type] = &tbo->lru;
+       while ((++lru)->lru[tbo->mem.mem_type] == res)
+               lru->lru[tbo->mem.mem_type] = &tbo->lru;
 
        return res;
 }
@@ -960,6 +962,8 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
        struct list_head *res = lru->swap_lru;
 
        lru->swap_lru = &tbo->swap;
+       while ((++lru)->swap_lru == res)
+               lru->swap_lru = &tbo->swap;
 
        return res;
 }
@@ -1011,6 +1015,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
        }
 
+       for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+               adev->mman.guard.lru[j] = NULL;
+       adev->mman.guard.swap_lru = NULL;
+
        adev->mman.initialized = true;
        r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
                                adev->mc.real_vram_size >> PAGE_SHIFT);
index b11f4e8..4aa993d 100644 (file)
@@ -1187,7 +1187,8 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                r = 0;
        }
 
-error:
        fence_put(fence);
+
+error:
        return r;
 }
index 8e642fc..80120fa 100644 (file)
@@ -1535,7 +1535,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                  rq, amdgpu_sched_jobs);
        if (r)
-               return r;
+               goto err;
 
        vm->page_directory_fence = NULL;
 
@@ -1565,6 +1565,9 @@ error_free_page_directory:
 error_free_sched_entity:
        amd_sched_entity_fini(&ring->sched, &vm->entity);
 
+err:
+       drm_free_large(vm->page_tables);
+
        return r;
 }
 
index e2f0e5d..a5c94b4 100644 (file)
@@ -5779,6 +5779,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
                break;
        case CHIP_KAVERI:
        case CHIP_KABINI:
+       case CHIP_MULLINS:
        default: BUG();
        }
 
index bff8668..b818461 100644 (file)
@@ -270,7 +270,8 @@ static const u32 tonga_mgcg_cgcg_init[] =
 
 static const u32 golden_settings_polaris11_a11[] =
 {
-       mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+       mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
+       mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -279,7 +280,7 @@ static const u32 golden_settings_polaris11_a11[] =
        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
-       mmSQ_CONFIG, 0x07f80000, 0x07180000,
+       mmSQ_CONFIG, 0x07f80000, 0x01180000,
        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
@@ -301,8 +302,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-       mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
-       mmCB_HW_CONTROL_2, 0, 0x0f000000,
+       mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+       mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -409,6 +410,7 @@ static const u32 golden_settings_iceland_a11[] =
        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+       mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -505,8 +507,10 @@ static const u32 cz_golden_settings_a11[] =
        mmGB_GPU_ID, 0x0000000f, 0x00000000,
        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+       mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
+       mmTCC_CTRL, 0x00100000, 0xf31fff7f,
        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
index d24a82b..0b0f086 100644 (file)
@@ -144,6 +144,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
                break;
        case CHIP_KAVERI:
        case CHIP_KABINI:
+       case CHIP_MULLINS:
                return 0;
        default: BUG();
        }
index 717359d..2aee2c6 100644 (file)
@@ -103,6 +103,11 @@ static const u32 stoney_mgcg_cgcg_init[] =
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
+static const u32 golden_settings_stoney_common[] =
+{
+       mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
+       mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
+};
 
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -142,6 +147,9 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
                amdgpu_program_register_sequence(adev,
                                                 stoney_mgcg_cgcg_init,
                                                 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
+               amdgpu_program_register_sequence(adev,
+                                                golden_settings_stoney_common,
+                                                (const u32)ARRAY_SIZE(golden_settings_stoney_common));
                break;
        default:
                break;
index 1351c7e..a64715d 100644 (file)
@@ -714,7 +714,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                DRM_ERROR("amdgpu: IB test timed out\n");
                r = -ETIMEDOUT;
                goto err1;
-       } else if (r) {
+       } else if (r < 0) {
                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
                goto err1;
        }
index e621eba..a7d3cb3 100644 (file)
@@ -184,7 +184,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
                                                        sizeof(u32)) + inx;
 
        pr_debug("kfd: get kernel queue doorbell\n"
-                        "     doorbell offset   == 0x%08d\n"
+                        "     doorbell offset   == 0x%08X\n"
                         "     kernel address    == 0x%08lX\n",
                *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
 
index ef312bb..963a24d 100644 (file)
@@ -405,7 +405,7 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
        spin_lock(&sched->job_list_lock);
        s_job = list_first_entry_or_null(&sched->ring_mirror_list,
                                         struct amd_sched_job, node);
-       if (s_job)
+       if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
                schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 
        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
index 80446e2..76bcb43 100644 (file)
@@ -185,14 +185,23 @@ int cirrus_driver_load(struct drm_device *dev, unsigned long flags)
                goto out;
        }
 
+       /*
+        * cirrus_modeset_init() is initializing/registering the emulated fbdev
+        * and DRM internals can access/test some of the fields in
+        * mode_config->funcs as part of the fbdev registration process.
+        * Make sure dev->mode_config.funcs is properly set to avoid
+        * dereferencing a NULL pointer.
+        * FIXME: mode_config.funcs assignment should probably be done in
+        * cirrus_modeset_init() (that's a common pattern seen in other DRM
+        * drivers).
+        */
+       dev->mode_config.funcs = &cirrus_mode_funcs;
        r = cirrus_modeset_init(cdev);
        if (r) {
                dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r);
                goto out;
        }
 
-       dev->mode_config.funcs = (void *)&cirrus_mode_funcs;
-
        return 0;
 out:
        cirrus_driver_unload(dev);
index fa39307..2a3ded4 100644 (file)
@@ -475,7 +475,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->ctm_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -483,7 +483,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        sizeof(struct drm_color_ctm),
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->gamma_lut_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -491,7 +491,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (crtc->funcs->atomic_set_property)
                return crtc->funcs->atomic_set_property(crtc, state, property, val);
index f1d9f05..ddebe54 100644 (file)
@@ -1121,16 +1121,14 @@ static int drm_connector_register_all(struct drm_device *dev)
        struct drm_connector *connector;
        int ret;
 
-       mutex_lock(&dev->mode_config.mutex);
-
-       drm_for_each_connector(connector, dev) {
+       /* FIXME: taking the mode config mutex ends up in a clash with
+        * fbcon/backlight registration */
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
                ret = drm_connector_register(connector);
                if (ret)
                        goto err;
        }
 
-       mutex_unlock(&dev->mode_config.mutex);
-
        return 0;
 
 err:
@@ -5406,6 +5404,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
        struct drm_pending_vblank_event *e = NULL;
        int ret = -EINVAL;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
            page_flip->reserved != 0)
                return -EINVAL;
index 7df26d4..637a0aa 100644 (file)
@@ -74,6 +74,8 @@
 #define EDID_QUIRK_FORCE_8BPC                  (1 << 8)
 /* Force 12bpc */
 #define EDID_QUIRK_FORCE_12BPC                 (1 << 9)
+/* Force 6bpc */
+#define EDID_QUIRK_FORCE_6BPC                  (1 << 10)
 
 struct detailed_mode_closure {
        struct drm_connector *connector;
@@ -100,6 +102,9 @@ static struct edid_quirk {
        /* Unknown Acer */
        { "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
 
+       /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
+       { "AEO", 0, EDID_QUIRK_FORCE_6BPC },
+
        /* Belinea 10 15 55 */
        { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
        { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
@@ -3862,6 +3867,20 @@ static void drm_add_display_info(struct edid *edid,
        /* HDMI deep color modes supported? Assign to info, if so */
        drm_assign_hdmi_deep_color_info(edid, info, connector);
 
+       /*
+        * Digital sink with "DFP 1.x compliant TMDS" according to EDID 1.3?
+        *
+        * For such displays, the DFP spec 1.0, section 3.10 "EDID support"
+        * tells us to assume 8 bpc color depth if the EDID doesn't have
+        * extensions which tell otherwise.
+        */
+       if ((info->bpc == 0) && (edid->revision < 4) &&
+           (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) {
+               info->bpc = 8;
+               DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n",
+                         connector->name, info->bpc);
+       }
+
        /* Only defined for 1.4 with digital displays */
        if (edid->revision < 4)
                return;
@@ -4082,6 +4101,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 
        drm_add_display_info(edid, &connector->display_info, connector);
 
+       if (quirks & EDID_QUIRK_FORCE_6BPC)
+               connector->display_info.bpc = 6;
+
        if (quirks & EDID_QUIRK_FORCE_8BPC)
                connector->display_info.bpc = 8;
 
index ce54e98..0a06f91 100644 (file)
@@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper)
 
        /* Sometimes user space wants everything disabled, so don't steal the
         * display if there's a master. */
-       if (lockless_dereference(dev->master))
+       if (READ_ONCE(dev->master))
                return false;
 
        drm_for_each_crtc(crtc, dev) {
index 87ef341..b382cf5 100644 (file)
@@ -1333,8 +1333,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        if (ret < 0)
                return ret;
 
-       mutex_lock(&gpu->lock);
-
        /*
         * TODO
         *
@@ -1348,16 +1346,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        if (unlikely(event == ~0U)) {
                DRM_ERROR("no free event\n");
                ret = -EBUSY;
-               goto out_unlock;
+               goto out_pm_put;
        }
 
        fence = etnaviv_gpu_fence_alloc(gpu);
        if (!fence) {
                event_free(gpu, event);
                ret = -ENOMEM;
-               goto out_unlock;
+               goto out_pm_put;
        }
 
+       mutex_lock(&gpu->lock);
+
        gpu->event[event].fence = fence;
        submit->fence = fence->seqno;
        gpu->active_fence = submit->fence;
@@ -1395,9 +1395,9 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        hangcheck_timer_reset(gpu);
        ret = 0;
 
-out_unlock:
        mutex_unlock(&gpu->lock);
 
+out_pm_put:
        etnaviv_gpu_pm_put(gpu);
 
        return ret;
index 21f9390..f68c789 100644 (file)
@@ -882,11 +882,12 @@ struct i915_gem_context {
 
        struct i915_ctx_hang_stats hang_stats;
 
-       /* Unique identifier for this context, used by the hw for tracking */
        unsigned long flags;
 #define CONTEXT_NO_ZEROMAP             BIT(0)
 #define CONTEXT_NO_ERROR_CAPTURE       BIT(1)
-       unsigned hw_id;
+
+       /* Unique identifier for this context, used by the hw for tracking */
+       unsigned int hw_id;
        u32 user_handle;
 
        u32 ggtt_alignment;
@@ -1854,6 +1855,7 @@ struct drm_i915_private {
        enum modeset_restore modeset_restore;
        struct mutex modeset_restore_lock;
        struct drm_atomic_state *modeset_restore_state;
+       struct drm_modeset_acquire_ctx reset_ctx;
 
        struct list_head vm_list; /* Global list of all address spaces */
        struct i915_ggtt ggtt; /* VM representing the global address space */
@@ -1962,6 +1964,13 @@ struct drm_i915_private {
        struct i915_suspend_saved_registers regfile;
        struct vlv_s0ix_state vlv_s0ix_state;
 
+       enum {
+               I915_SKL_SAGV_UNKNOWN = 0,
+               I915_SKL_SAGV_DISABLED,
+               I915_SKL_SAGV_ENABLED,
+               I915_SKL_SAGV_NOT_CONTROLLED
+       } skl_sagv_status;
+
        struct {
                /*
                 * Raw watermark latency values:
@@ -3590,6 +3599,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
 {
+       wmb();
        if (INTEL_GEN(dev_priv) < 6)
                intel_gtt_chipset_flush();
 }
index 1168150..a77ce99 100644 (file)
@@ -879,9 +879,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
        ret = i915_gem_shmem_pread(dev, obj, args, file);
 
        /* pread for non shmem backed objects */
-       if (ret == -EFAULT || ret == -ENODEV)
+       if (ret == -EFAULT || ret == -ENODEV) {
+               intel_runtime_pm_get(to_i915(dev));
                ret = i915_gem_gtt_pread(dev, obj, args->size,
                                        args->offset, args->data_ptr);
+               intel_runtime_pm_put(to_i915(dev));
+       }
 
 out:
        drm_gem_object_unreference(&obj->base);
@@ -1306,7 +1309,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                 * textures). Fallback to the shmem path in that case. */
        }
 
-       if (ret == -EFAULT) {
+       if (ret == -EFAULT || ret == -ENOSPC) {
                if (obj->phys_handle)
                        ret = i915_gem_phys_pwrite(obj, args, file);
                else if (i915_gem_object_has_struct_page(obj))
@@ -3169,6 +3172,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
        }
 
        intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
+       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_reset(struct drm_device *dev)
@@ -3186,6 +3191,7 @@ void i915_gem_reset(struct drm_device *dev)
 
        for_each_engine(engine, dev_priv)
                i915_gem_reset_engine_cleanup(engine);
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
        i915_gem_context_reset(dev);
 
index 1978633..b35e5b6 100644 (file)
@@ -943,8 +943,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 {
        const unsigned other_rings = ~intel_engine_flag(req->engine);
        struct i915_vma *vma;
-       uint32_t flush_domains = 0;
-       bool flush_chipset = false;
        int ret;
 
        list_for_each_entry(vma, vmas, exec_list) {
@@ -957,16 +955,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
                }
 
                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-                       flush_chipset |= i915_gem_clflush_object(obj, false);
-
-               flush_domains |= obj->base.write_domain;
+                       i915_gem_clflush_object(obj, false);
        }
 
-       if (flush_chipset)
-               i915_gem_chipset_flush(req->engine->i915);
-
-       if (flush_domains & I915_GEM_DOMAIN_GTT)
-               wmb();
+       /* Unconditionally flush any chipset caches (for streaming writes). */
+       i915_gem_chipset_flush(req->engine->i915);
 
        /* Unconditionally invalidate gpu caches and ensure that we do flush
         * any residual writes from the previous batch.
index 10f1e32..7a30af7 100644 (file)
@@ -2873,6 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev)
                struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
                ppgtt->base.cleanup(&ppgtt->base);
+               kfree(ppgtt);
        }
 
        i915_gem_cleanup_stolen(dev);
index ce14fe0..bf2cad3 100644 (file)
@@ -1536,6 +1536,7 @@ enum skl_disp_power_wells {
 #define BALANCE_LEG_MASK(port)         (7<<(8+3*(port)))
 /* Balance leg disable bits */
 #define BALANCE_LEG_DISABLE_SHIFT      23
+#define BALANCE_LEG_DISABLE(port)      (1 << (23 + (port)))
 
 /*
  * Fence registers
@@ -7144,6 +7145,15 @@ enum {
 
 #define GEN6_PCODE_MAILBOX                     _MMIO(0x138124)
 #define   GEN6_PCODE_READY                     (1<<31)
+#define   GEN6_PCODE_ERROR_MASK                        0xFF
+#define     GEN6_PCODE_SUCCESS                 0x0
+#define     GEN6_PCODE_ILLEGAL_CMD             0x1
+#define     GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2
+#define     GEN6_PCODE_TIMEOUT                 0x3
+#define     GEN6_PCODE_UNIMPLEMENTED_CMD       0xFF
+#define     GEN7_PCODE_TIMEOUT                 0x2
+#define     GEN7_PCODE_ILLEGAL_DATA            0x3
+#define     GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10
 #define          GEN6_PCODE_WRITE_RC6VIDS              0x4
 #define          GEN6_PCODE_READ_RC6VIDS               0x5
 #define     GEN6_ENCODE_RC6_VID(mv)            (((mv) - 245) / 5)
@@ -7165,6 +7175,10 @@ enum {
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ          0x17
 #define   DISPLAY_IPS_CONTROL                  0x19
 #define          HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL  0x1A
+#define   GEN9_PCODE_SAGV_CONTROL              0x21
+#define     GEN9_SAGV_DISABLE                  0x0
+#define     GEN9_SAGV_IS_DISABLED              0x1
+#define     GEN9_SAGV_ENABLE                   0x3
 #define GEN6_PCODE_DATA                                _MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT       8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT     16
index 6700a7b..d32f586 100644 (file)
@@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
        if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
                return;
 
+       i915_audio_component_get_power(dev);
+
        /*
         * Enable/disable generating the codec wake signal, overriding the
         * internal logic to generate the codec wake to controller.
@@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
                I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
                usleep_range(1000, 1500);
        }
+
+       i915_audio_component_put_power(dev);
 }
 
 /* Get CDCLK in kHz  */
@@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
            !IS_HASWELL(dev_priv))
                return 0;
 
+       i915_audio_component_get_power(dev);
        mutex_lock(&dev_priv->av_mutex);
        /* 1. get the pipe */
        intel_encoder = dev_priv->dig_port_map[port];
@@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
 
  unlock:
        mutex_unlock(&dev_priv->av_mutex);
+       i915_audio_component_put_power(dev);
        return err;
 }
 
index 3edb958..c3b33a1 100644 (file)
  * be moved to FW_FAILED.
  */
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 1)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 23)
+#define SKL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 26)
 
-#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
+#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED       CSR_VERSION(1, 7)
 
index dd1d6fe..1a7efac 100644 (file)
@@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
 static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
        { 0x0000201B, 0x000000A2, 0x0 },
        { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x1 },
        { 0x80009010, 0x000000C0, 0x1 },
        { 0x0000201B, 0x0000009D, 0x0 },
        { 0x80005012, 0x000000C0, 0x1 },
@@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
 static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
        { 0x00000018, 0x000000A2, 0x0 },
        { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x3 },
        { 0x80009010, 0x000000C0, 0x3 },
        { 0x00000018, 0x0000009D, 0x0 },
        { 0x80005012, 0x000000C0, 0x3 },
@@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
        }
 }
 
+static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
+{
+       int n_hdmi_entries;
+       int hdmi_level;
+       int hdmi_default_entry;
+
+       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+
+       if (IS_BROXTON(dev_priv))
+               return hdmi_level;
+
+       if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+               skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
+               hdmi_default_entry = 8;
+       } else if (IS_BROADWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       } else if (IS_HASWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
+               hdmi_default_entry = 6;
+       } else {
+               WARN(1, "ddi translation table missing\n");
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       }
+
+       /* Choose a good default if VBT is badly populated */
+       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+           hdmi_level >= n_hdmi_entries)
+               hdmi_level = hdmi_default_entry;
+
+       return hdmi_level;
+}
+
 /*
  * Starting with Haswell, DDI port buffers must be programmed with correct
  * values in advance. The buffer values are different for FDI and DP modes,
@@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
 {
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
        u32 iboost_bit = 0;
-       int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
+       int i, n_hdmi_entries, n_dp_entries, n_edp_entries,
            size;
        int hdmi_level;
        enum port port;
@@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
        const struct ddi_buf_trans *ddi_translations;
 
        port = intel_ddi_get_encoder_port(encoder);
-       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+       hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
 
        if (IS_BROXTON(dev_priv)) {
                if (encoder->type != INTEL_OUTPUT_HDMI)
@@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                                skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
                ddi_translations_hdmi =
                                skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
-               hdmi_default_entry = 8;
                /* If we're boosting the current, set bit 31 of trans1 */
                if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
                    dev_priv->vbt.ddi_port_info[port].dp_boost_level)
@@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
 
                n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
        } else if (IS_HASWELL(dev_priv)) {
                ddi_translations_fdi = hsw_ddi_translations_fdi;
                ddi_translations_dp = hsw_ddi_translations_dp;
@@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                ddi_translations_hdmi = hsw_ddi_translations_hdmi;
                n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
-               hdmi_default_entry = 6;
        } else {
                WARN(1, "ddi translation table missing\n");
                ddi_translations_edp = bdw_ddi_translations_dp;
@@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
                n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
        }
 
        switch (encoder->type) {
@@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
        if (encoder->type != INTEL_OUTPUT_HDMI)
                return;
 
-       /* Choose a good default if VBT is badly populated */
-       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
-           hdmi_level >= n_hdmi_entries)
-               hdmi_level = hdmi_default_entry;
-
        /* Entry 9 is for HDMI: */
        I915_WRITE(DDI_BUF_TRANS_LO(port, i),
                   ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
@@ -1379,14 +1404,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
                           TRANS_CLK_SEL_DISABLED);
 }
 
-static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
-                              u32 level, enum port port, int type)
+static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+                               enum port port, uint8_t iboost)
 {
+       u32 tmp;
+
+       tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
+       tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
+       if (iboost)
+               tmp |= iboost << BALANCE_LEG_SHIFT(port);
+       else
+               tmp |= BALANCE_LEG_DISABLE(port);
+       I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
+}
+
+static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
+{
+       struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+       struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
+       enum port port = intel_dig_port->port;
+       int type = encoder->type;
        const struct ddi_buf_trans *ddi_translations;
        uint8_t iboost;
        uint8_t dp_iboost, hdmi_iboost;
        int n_entries;
-       u32 reg;
 
        /* VBT may override standard boost values */
        dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
@@ -1428,16 +1469,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
                return;
        }
 
-       reg = I915_READ(DISPIO_CR_TX_BMU_CR0);
-       reg &= ~BALANCE_LEG_MASK(port);
-       reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
-
-       if (iboost)
-               reg |= iboost << BALANCE_LEG_SHIFT(port);
-       else
-               reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
+       _skl_ddi_set_iboost(dev_priv, port, iboost);
 
-       I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
+       if (port == PORT_A && intel_dig_port->max_lanes == 4)
+               _skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
 }
 
 static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
@@ -1568,7 +1603,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
        level = translate_signal_level(signal_levels);
 
        if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-               skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
+               skl_ddi_set_iboost(encoder, level);
        else if (IS_BROXTON(dev_priv))
                bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
 
@@ -1637,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
                        intel_dp_stop_link_train(intel_dp);
        } else if (type == INTEL_OUTPUT_HDMI) {
                struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+               int level = intel_ddi_hdmi_level(dev_priv, port);
+
+               if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+                       skl_ddi_set_iboost(intel_encoder, level);
 
                intel_hdmi->set_infoframes(encoder,
                                           crtc->config->has_hdmi_sink,
index c457eed..175595f 100644 (file)
@@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev)
 
        for_each_crtc(dev, crtc) {
                struct intel_plane *plane = to_intel_plane(crtc->primary);
-               struct intel_plane_state *plane_state;
-
-               drm_modeset_lock_crtc(crtc, &plane->base);
-               plane_state = to_intel_plane_state(plane->base.state);
+               struct intel_plane_state *plane_state =
+                       to_intel_plane_state(plane->base.state);
 
                if (plane_state->visible)
                        plane->update_plane(&plane->base,
                                            to_intel_crtc_state(crtc->state),
                                            plane_state);
+       }
+}
+
+static int
+__intel_display_resume(struct drm_device *dev,
+                      struct drm_atomic_state *state)
+{
+       struct drm_crtc_state *crtc_state;
+       struct drm_crtc *crtc;
+       int i, ret;
+
+       intel_modeset_setup_hw_state(dev);
+       i915_redisable_vga(dev);
+
+       if (!state)
+               return 0;
 
-               drm_modeset_unlock_crtc(crtc);
+       for_each_crtc_in_state(state, crtc, crtc_state, i) {
+               /*
+                * Force recalculation even if we restore
+                * current state. With fast modeset this may not result
+                * in a modeset when the state is compatible.
+                */
+               crtc_state->mode_changed = true;
        }
+
+       /* ignore any reset values/BIOS leftovers in the WM registers */
+       to_intel_atomic_state(state)->skip_intermediate_wm = true;
+
+       ret = drm_atomic_commit(state);
+
+       WARN_ON(ret == -EDEADLK);
+       return ret;
 }
 
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state;
+       int ret;
+
        /* no reset support for gen2 */
        if (IS_GEN2(dev_priv))
                return;
 
-       /* reset doesn't touch the display */
+       /*
+        * Need mode_config.mutex so that we don't
+        * trample ongoing ->detect() and whatnot.
+        */
+       mutex_lock(&dev->mode_config.mutex);
+       drm_modeset_acquire_init(ctx, 0);
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, ctx);
+               if (ret != -EDEADLK)
+                       break;
+
+               drm_modeset_backoff(ctx);
+       }
+
+       /* reset doesn't touch the display, but flips might get nuked anyway, */
        if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
                return;
 
-       drm_modeset_lock_all(&dev_priv->drm);
        /*
         * Disabling the crtcs gracefully seems nicer. Also the
         * g33 docs say we should at least disable all the planes.
         */
-       intel_display_suspend(&dev_priv->drm);
+       state = drm_atomic_helper_duplicate_state(dev, ctx);
+       if (IS_ERR(state)) {
+               ret = PTR_ERR(state);
+               state = NULL;
+               DRM_ERROR("Duplicating state failed with %i\n", ret);
+               goto err;
+       }
+
+       ret = drm_atomic_helper_disable_all(dev, ctx);
+       if (ret) {
+               DRM_ERROR("Suspending crtc's failed with %i\n", ret);
+               goto err;
+       }
+
+       dev_priv->modeset_restore_state = state;
+       state->acquire_ctx = ctx;
+       return;
+
+err:
+       drm_atomic_state_free(state);
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+       int ret;
+
        /*
         * Flips in the rings will be nuked by the reset,
         * so complete all pending flips so that user space
@@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
        if (IS_GEN2(dev_priv))
                return;
 
+       dev_priv->modeset_restore_state = NULL;
+
        /* reset doesn't touch the display */
        if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
                /*
@@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
                 * FIXME: Atomic will make this obsolete since we won't schedule
                 * CS-based flips (which might get lost in gpu resets) any more.
                 */
-               intel_update_primary_planes(&dev_priv->drm);
-               return;
-       }
-
-       /*
-        * The display has been reset as well,
-        * so need a full re-initialization.
-        */
-       intel_runtime_pm_disable_interrupts(dev_priv);
-       intel_runtime_pm_enable_interrupts(dev_priv);
+               intel_update_primary_planes(dev);
+       } else {
+               /*
+                * The display has been reset as well,
+                * so need a full re-initialization.
+                */
+               intel_runtime_pm_disable_interrupts(dev_priv);
+               intel_runtime_pm_enable_interrupts(dev_priv);
 
-       intel_modeset_init_hw(&dev_priv->drm);
+               intel_modeset_init_hw(dev);
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       if (dev_priv->display.hpd_irq_setup)
-               dev_priv->display.hpd_irq_setup(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->display.hpd_irq_setup)
+                       dev_priv->display.hpd_irq_setup(dev_priv);
+               spin_unlock_irq(&dev_priv->irq_lock);
 
-       intel_display_resume(&dev_priv->drm);
+               ret = __intel_display_resume(dev, state);
+               if (ret)
+                       DRM_ERROR("Restoring old state failed with %i\n", ret);
 
-       intel_hpd_init(dev_priv);
+               intel_hpd_init(dev_priv);
+       }
 
-       drm_modeset_unlock_all(&dev_priv->drm);
+       drm_modeset_drop_locks(ctx);
+       drm_modeset_acquire_fini(ctx);
+       mutex_unlock(&dev->mode_config.mutex);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -5691,15 +5766,7 @@ static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv)
 
 static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
 {
-       unsigned int i;
-
-       for (i = 0; i < 15; i++) {
-               if (skl_cdclk_pcu_ready(dev_priv))
-                       return true;
-               udelay(10);
-       }
-
-       return false;
+       return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0;
 }
 
 static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco)
@@ -12114,21 +12181,11 @@ connected_sink_compute_bpp(struct intel_connector *connector,
                pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
        }
 
-       /* Clamp bpp to default limit on screens without EDID 1.4 */
-       if (connector->base.display_info.bpc == 0) {
-               int type = connector->base.connector_type;
-               int clamp_bpp = 24;
-
-               /* Fall back to 18 bpp when DP sink capability is unknown. */
-               if (type == DRM_MODE_CONNECTOR_DisplayPort ||
-                   type == DRM_MODE_CONNECTOR_eDP)
-                       clamp_bpp = 18;
-
-               if (bpp > clamp_bpp) {
-                       DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
-                                     bpp, clamp_bpp);
-                       pipe_config->pipe_bpp = clamp_bpp;
-               }
+       /* Clamp bpp to 8 on screens without EDID 1.4 */
+       if (connector->base.display_info.bpc == 0 && bpp > 24) {
+               DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
+                             bpp);
+               pipe_config->pipe_bpp = 24;
        }
 }
 
@@ -13702,6 +13759,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
                     intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco))
                        dev_priv->display.modeset_commit_cdclk(state);
 
+               /*
+                * SKL workaround: bspec recommends we disable the SAGV when we
+                * have more then one pipe enabled
+                */
+               if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state))
+                       skl_disable_sagv(dev_priv);
+
                intel_modeset_verify_disabled(dev);
        }
 
@@ -13775,6 +13839,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
                intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
        }
 
+       if (IS_SKYLAKE(dev_priv) && intel_state->modeset &&
+           skl_can_enable_sagv(state))
+               skl_enable_sagv(dev_priv);
+
        drm_atomic_helper_commit_hw_done(state);
 
        if (intel_state->modeset)
@@ -16174,9 +16242,10 @@ void intel_display_resume(struct drm_device *dev)
        struct drm_atomic_state *state = dev_priv->modeset_restore_state;
        struct drm_modeset_acquire_ctx ctx;
        int ret;
-       bool setup = false;
 
        dev_priv->modeset_restore_state = NULL;
+       if (state)
+               state->acquire_ctx = &ctx;
 
        /*
         * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16187,43 +16256,17 @@ void intel_display_resume(struct drm_device *dev)
        mutex_lock(&dev->mode_config.mutex);
        drm_modeset_acquire_init(&ctx, 0);
 
-retry:
-       ret = drm_modeset_lock_all_ctx(dev, &ctx);
-
-       if (ret == 0 && !setup) {
-               setup = true;
-
-               intel_modeset_setup_hw_state(dev);
-               i915_redisable_vga(dev);
-       }
-
-       if (ret == 0 && state) {
-               struct drm_crtc_state *crtc_state;
-               struct drm_crtc *crtc;
-               int i;
-
-               state->acquire_ctx = &ctx;
-
-               /* ignore any reset values/BIOS leftovers in the WM registers */
-               to_intel_atomic_state(state)->skip_intermediate_wm = true;
-
-               for_each_crtc_in_state(state, crtc, crtc_state, i) {
-                       /*
-                        * Force recalculation even if we restore
-                        * current state. With fast modeset this may not result
-                        * in a modeset when the state is compatible.
-                        */
-                       crtc_state->mode_changed = true;
-               }
-
-               ret = drm_atomic_commit(state);
-       }
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, &ctx);
+               if (ret != -EDEADLK)
+                       break;
 
-       if (ret == -EDEADLK) {
                drm_modeset_backoff(&ctx);
-               goto retry;
        }
 
+       if (!ret)
+               ret = __intel_display_resume(dev, state);
+
        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);
        mutex_unlock(&dev->mode_config.mutex);
index cc937a1..ff399b9 100644 (file)
@@ -1716,6 +1716,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
                          struct skl_ddb_allocation *ddb /* out */);
+bool skl_can_enable_sagv(struct drm_atomic_state *state);
+int skl_enable_sagv(struct drm_i915_private *dev_priv);
+int skl_disable_sagv(struct drm_i915_private *dev_priv);
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
 bool ilk_disable_lp_wm(struct drm_device *dev);
 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
index 6a7ad3e..3836a1c 100644 (file)
@@ -1230,12 +1230,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
        if (i915.enable_fbc >= 0)
                return !!i915.enable_fbc;
 
+       if (!HAS_FBC(dev_priv))
+               return 0;
+
        if (IS_BROADWELL(dev_priv))
                return 1;
 
        return 0;
 }
 
+static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+       /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
+       if (intel_iommu_gfx_mapped &&
+           (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
+               DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
+               return true;
+       }
+#endif
+
+       return false;
+}
+
 /**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
@@ -1253,6 +1270,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
        fbc->active = false;
        fbc->work.scheduled = false;
 
+       if (need_fbc_vtd_wa(dev_priv))
+               mkwrite_device_info(dev_priv)->has_fbc = false;
+
        i915.enable_fbc = intel_sanitize_fbc_option(dev_priv);
        DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc);
 
index 86b00c6..3e3632c 100644 (file)
@@ -782,7 +782,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
        struct intel_fbdev *ifbdev = dev_priv->fbdev;
        struct fb_info *info;
 
-       if (!ifbdev)
+       if (!ifbdev || !ifbdev->fb)
                return;
 
        info = ifbdev->helper.fbdev;
@@ -827,31 +827,28 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 
 void intel_fbdev_output_poll_changed(struct drm_device *dev)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       if (dev_priv->fbdev)
-               drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper);
+       struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
+
+       if (ifbdev && ifbdev->fb)
+               drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
 void intel_fbdev_restore_mode(struct drm_device *dev)
 {
-       int ret;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_fbdev *ifbdev = dev_priv->fbdev;
-       struct drm_fb_helper *fb_helper;
+       struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
 
        if (!ifbdev)
                return;
 
        intel_fbdev_sync(ifbdev);
+       if (!ifbdev->fb)
+               return;
 
-       fb_helper = &ifbdev->helper;
-
-       ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
-       if (ret) {
+       if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) {
                DRM_DEBUG("failed to restore crtc mode\n");
        } else {
-               mutex_lock(&fb_helper->dev->struct_mutex);
+               mutex_lock(&dev->struct_mutex);
                intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT);
-               mutex_unlock(&fb_helper->dev->struct_mutex);
+               mutex_unlock(&dev->struct_mutex);
        }
 }
index f4f3fcc..53e13c1 100644 (file)
@@ -2852,6 +2852,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
 
 #define SKL_DDB_SIZE           896     /* in blocks */
 #define BXT_DDB_SIZE           512
+#define SKL_SAGV_BLOCK_TIME    30 /* Âµs */
 
 /*
  * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
@@ -2875,6 +2876,153 @@ skl_wm_plane_id(const struct intel_plane *plane)
        }
 }
 
+/*
+ * SAGV dynamically adjusts the system agent voltage and clock frequencies
+ * depending on power and performance requirements. The display engine access
+ * to system memory is blocked during the adjustment time. Because of the
+ * blocking time, having this enabled can cause full system hangs and/or pipe
+ * underruns if we don't meet all of the following requirements:
+ *
+ *  - <= 1 pipe enabled
+ *  - All planes can enable watermarks for latencies >= SAGV engine block time
+ *  - We're not using an interlaced display configuration
+ */
+int
+skl_enable_sagv(struct drm_i915_private *dev_priv)
+{
+       int ret;
+
+       if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+           dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED)
+               return 0;
+
+       DRM_DEBUG_KMS("Enabling the SAGV\n");
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+                                     GEN9_SAGV_ENABLE);
+
+       /* We don't need to wait for the SAGV when enabling */
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       /*
+        * Some skl systems, pre-release machines in particular,
+        * don't actually have an SAGV.
+        */
+       if (ret == -ENXIO) {
+               DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+               dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+               return 0;
+       } else if (ret < 0) {
+               DRM_ERROR("Failed to enable the SAGV\n");
+               return ret;
+       }
+
+       dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED;
+       return 0;
+}
+
+static int
+skl_do_sagv_disable(struct drm_i915_private *dev_priv)
+{
+       int ret;
+       uint32_t temp = GEN9_SAGV_DISABLE;
+
+       ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+                                    &temp);
+       if (ret)
+               return ret;
+       else
+               return temp & GEN9_SAGV_IS_DISABLED;
+}
+
+int
+skl_disable_sagv(struct drm_i915_private *dev_priv)
+{
+       int ret, result;
+
+       if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+           dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED)
+               return 0;
+
+       DRM_DEBUG_KMS("Disabling the SAGV\n");
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       /* bspec says to keep retrying for at least 1 ms */
+       ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       if (ret == -ETIMEDOUT) {
+               DRM_ERROR("Request to disable SAGV timed out\n");
+               return -ETIMEDOUT;
+       }
+
+       /*
+        * Some skl systems, pre-release machines in particular,
+        * don't actually have an SAGV.
+        */
+       if (result == -ENXIO) {
+               DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+               dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+               return 0;
+       } else if (result < 0) {
+               DRM_ERROR("Failed to disable the SAGV\n");
+               return result;
+       }
+
+       dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED;
+       return 0;
+}
+
+bool skl_can_enable_sagv(struct drm_atomic_state *state)
+{
+       struct drm_device *dev = state->dev;
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+       struct drm_crtc *crtc;
+       enum pipe pipe;
+       int level, plane;
+
+       /*
+        * SKL workaround: bspec recommends we disable the SAGV when we have
+        * more then one pipe enabled
+        *
+        * If there are no active CRTCs, no additional checks need be performed
+        */
+       if (hweight32(intel_state->active_crtcs) == 0)
+               return true;
+       else if (hweight32(intel_state->active_crtcs) > 1)
+               return false;
+
+       /* Since we're now guaranteed to only have one active CRTC... */
+       pipe = ffs(intel_state->active_crtcs) - 1;
+       crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+
+       if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE)
+               return false;
+
+       for_each_plane(dev_priv, pipe, plane) {
+               /* Skip this plane if it's not enabled */
+               if (intel_state->wm_results.plane[pipe][plane][0] == 0)
+                       continue;
+
+               /* Find the highest enabled wm level for this plane */
+               for (level = ilk_wm_max_level(dev);
+                    intel_state->wm_results.plane[pipe][plane][level] == 0; --level)
+                    { }
+
+               /*
+                * If any of the planes on this pipe don't enable wm levels
+                * that incur memory latencies higher then 30µs we can't enable
+                * the SAGV
+                */
+               if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME)
+                       return false;
+       }
+
+       return true;
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
                                   const struct intel_crtc_state *cstate,
@@ -3107,8 +3255,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate)
                total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id];
        }
 
-       WARN_ON(cstate->plane_mask && total_data_rate == 0);
-
        return total_data_rate;
 }
 
@@ -3344,6 +3490,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
                plane_bytes_per_line *= 4;
                plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
                plane_blocks_per_line /= 4;
+       } else if (tiling == DRM_FORMAT_MOD_NONE) {
+               plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
        } else {
                plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
        }
@@ -3910,9 +4058,24 @@ skl_compute_ddb(struct drm_atomic_state *state)
         * pretend that all pipes switched active status so that we'll
         * ensure a full DDB recompute.
         */
-       if (dev_priv->wm.distrust_bios_wm)
+       if (dev_priv->wm.distrust_bios_wm) {
+               ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+                                      state->acquire_ctx);
+               if (ret)
+                       return ret;
+
                intel_state->active_pipe_changes = ~0;
 
+               /*
+                * We usually only initialize intel_state->active_crtcs if we
+                * we're doing a modeset; make sure this field is always
+                * initialized during the sanitization process that happens
+                * on the first commit too.
+                */
+               if (!intel_state->modeset)
+                       intel_state->active_crtcs = dev_priv->active_crtcs;
+       }
+
        /*
         * If the modeset changes which CRTC's are active, we need to
         * recompute the DDB allocation for *all* active pipes, even
@@ -3941,11 +4104,33 @@ skl_compute_ddb(struct drm_atomic_state *state)
                ret = skl_allocate_pipe_ddb(cstate, ddb);
                if (ret)
                        return ret;
+
+               ret = drm_atomic_add_affected_planes(state, &intel_crtc->base);
+               if (ret)
+                       return ret;
        }
 
        return 0;
 }
 
+static void
+skl_copy_wm_for_pipe(struct skl_wm_values *dst,
+                    struct skl_wm_values *src,
+                    enum pipe pipe)
+{
+       dst->wm_linetime[pipe] = src->wm_linetime[pipe];
+       memcpy(dst->plane[pipe], src->plane[pipe],
+              sizeof(dst->plane[pipe]));
+       memcpy(dst->plane_trans[pipe], src->plane_trans[pipe],
+              sizeof(dst->plane_trans[pipe]));
+
+       dst->ddb.pipe[pipe] = src->ddb.pipe[pipe];
+       memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
+              sizeof(dst->ddb.y_plane[pipe]));
+       memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
+              sizeof(dst->ddb.plane[pipe]));
+}
+
 static int
 skl_compute_wm(struct drm_atomic_state *state)
 {
@@ -4018,8 +4203,10 @@ static void skl_update_wm(struct drm_crtc *crtc)
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct skl_wm_values *results = &dev_priv->wm.skl_results;
+       struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
        struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
        struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
+       int pipe;
 
        if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
                return;
@@ -4031,8 +4218,12 @@ static void skl_update_wm(struct drm_crtc *crtc)
        skl_write_wm_values(dev_priv, results);
        skl_flush_wm_values(dev_priv, results);
 
-       /* store the new configuration */
-       dev_priv->wm.skl_hw = *results;
+       /*
+        * Store the new configuration (but only for the pipes that have
+        * changed; the other values weren't recomputed).
+        */
+       for_each_pipe_masked(dev_priv, pipe, results->dirty_pipes)
+               skl_copy_wm_for_pipe(hw_vals, results, pipe);
 
        mutex_unlock(&dev_priv->wm.wm_mutex);
 }
@@ -4892,7 +5083,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
                else
                        gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
                dev_priv->rps.last_adj = 0;
-               I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+               I915_WRITE(GEN6_PMINTRMSK,
+                          gen6_sanitize_rps_pm_mask(dev_priv, ~0));
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
 
@@ -6573,9 +6765,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
 {
-       if (IS_CHERRYVIEW(dev_priv))
-               return;
-       else if (IS_VALLEYVIEW(dev_priv))
+       if (IS_VALLEYVIEW(dev_priv))
                valleyview_cleanup_gt_powersave(dev_priv);
 
        if (!i915.enable_rc6)
@@ -7657,8 +7847,53 @@ void intel_init_pm(struct drm_device *dev)
        }
 }
 
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+       uint32_t flags =
+               I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+       switch (flags) {
+       case GEN6_PCODE_SUCCESS:
+               return 0;
+       case GEN6_PCODE_UNIMPLEMENTED_CMD:
+       case GEN6_PCODE_ILLEGAL_CMD:
+               return -ENXIO;
+       case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+               return -EOVERFLOW;
+       case GEN6_PCODE_TIMEOUT:
+               return -ETIMEDOUT;
+       default:
+               MISSING_CASE(flags)
+               return 0;
+       }
+}
+
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+       uint32_t flags =
+               I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+       switch (flags) {
+       case GEN6_PCODE_SUCCESS:
+               return 0;
+       case GEN6_PCODE_ILLEGAL_CMD:
+               return -ENXIO;
+       case GEN7_PCODE_TIMEOUT:
+               return -ETIMEDOUT;
+       case GEN7_PCODE_ILLEGAL_DATA:
+               return -EINVAL;
+       case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+               return -EOVERFLOW;
+       default:
+               MISSING_CASE(flags);
+               return 0;
+       }
+}
+
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
+       int status;
+
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        /* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7685,12 +7920,25 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
        *val = I915_READ_FW(GEN6_PCODE_DATA);
        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+       if (INTEL_GEN(dev_priv) > 6)
+               status = gen7_check_mailbox_status(dev_priv);
+       else
+               status = gen6_check_mailbox_status(dev_priv);
+
+       if (status) {
+               DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n",
+                                status);
+               return status;
+       }
+
        return 0;
 }
 
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
-                              u32 mbox, u32 val)
+                           u32 mbox, u32 val)
 {
+       int status;
+
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        /* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7715,6 +7963,17 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
 
        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+       if (INTEL_GEN(dev_priv) > 6)
+               status = gen7_check_mailbox_status(dev_priv);
+       else
+               status = gen6_check_mailbox_status(dev_priv);
+
+       if (status) {
+               DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n",
+                                status);
+               return status;
+       }
+
        return 0;
 }
 
index cca7792..1d3161b 100644 (file)
@@ -1178,8 +1178,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
                I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
                                           L3_HIGH_PRIO_CREDITS(2));
 
-       /* WaInsertDummyPushConstPs:bxt */
-       if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:bxt */
+       if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
@@ -1222,8 +1222,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
                I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
                           GEN8_LQSC_RO_PERF_DIS);
 
-       /* WaInsertDummyPushConstPs:kbl */
-       if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:kbl */
+       if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
index 23ac804..294de45 100644 (file)
@@ -2,6 +2,9 @@ config DRM_MEDIATEK
        tristate "DRM Support for Mediatek SoCs"
        depends on DRM
        depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST)
+       depends on COMMON_CLK
+       depends on HAVE_ARM_SMCCC
+       depends on OF
        select DRM_GEM_CMA_HELPER
        select DRM_KMS_HELPER
        select DRM_MIPI_DSI
index 528bdef..6190035 100644 (file)
@@ -1151,7 +1151,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
        if (ret)
                goto out;
 
-       ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+       ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem);
 out:
        ttm_bo_mem_put(bo, &tmp_mem);
        return ret;
@@ -1179,7 +1179,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
        if (ret)
                return ret;
 
-       ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+       ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem);
        if (ret)
                goto out;
 
index df26570..28c1423 100644 (file)
@@ -73,10 +73,12 @@ static void qxl_fb_image_init(struct qxl_fb_image *qxl_fb_image,
        }
 }
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 static struct fb_deferred_io qxl_defio = {
        .delay          = QXL_DIRTY_DELAY,
        .deferred_io    = drm_fb_helper_deferred_io,
 };
+#endif
 
 static struct fb_ops qxlfb_ops = {
        .owner = THIS_MODULE,
@@ -313,8 +315,10 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev,
                goto out_destroy_fbi;
        }
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        info->fbdefio = &qxl_defio;
        fb_deferred_io_init(info);
+#endif
 
        qdev->fbdev_info = info;
        qdev->fbdev_qfb = &qfbdev->qfb;
index a97abc8..1dcf390 100644 (file)
@@ -627,7 +627,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                        if (radeon_crtc->ss.refdiv) {
                                radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
                                radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-                               if (rdev->family >= CHIP_RV770)
+                               if (ASIC_IS_AVIVO(rdev) &&
+                                   rdev->family != CHIP_RS780 &&
+                                   rdev->family != CHIP_RS880)
                                        radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
                        }
                }
index 6de3428..ddef0d4 100644 (file)
@@ -198,16 +198,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                atpx->functions.power_cntl = false;
-#endif
                atpx->is_hybrid = true;
        }
 
index ffdad81..c2e0a1c 100644 (file)
@@ -263,8 +263,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 
        rdev = radeon_get_rdev(bo->bdev);
        ridx = radeon_copy_ring_index(rdev);
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
 
        switch (old_mem->mem_type) {
        case TTM_PL_VRAM:
@@ -346,7 +346,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
        if (unlikely(r)) {
                goto out_cleanup;
        }
-       r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+       r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 out_cleanup:
        ttm_bo_mem_put(bo, &tmp_mem);
        return r;
@@ -379,7 +379,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
        if (unlikely(r)) {
                return r;
        }
-       r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+       r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
        if (unlikely(r)) {
                goto out_cleanup;
        }
index 4de3ff0..e03004f 100644 (file)
@@ -125,6 +125,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 
        /* Link drm_bridge to encoder */
        bridge->encoder = encoder;
+       encoder->bridge = bridge;
 
        ret = drm_bridge_attach(rcdu->ddev, bridge);
        if (ret) {
index 3d228ad..3dea121 100644 (file)
@@ -840,6 +840,21 @@ static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = {
        .destroy = tegra_output_encoder_destroy,
 };
 
+static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
+{
+       int err;
+
+       if (dsi->slave)
+               tegra_dsi_unprepare(dsi->slave);
+
+       err = tegra_mipi_disable(dsi->mipi);
+       if (err < 0)
+               dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n",
+                       err);
+
+       pm_runtime_put(dsi->dev);
+}
+
 static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 {
        struct tegra_output *output = encoder_to_output(encoder);
@@ -876,7 +891,26 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 
        tegra_dsi_disable(dsi);
 
-       pm_runtime_put(dsi->dev);
+       tegra_dsi_unprepare(dsi);
+}
+
+static void tegra_dsi_prepare(struct tegra_dsi *dsi)
+{
+       int err;
+
+       pm_runtime_get_sync(dsi->dev);
+
+       err = tegra_mipi_enable(dsi->mipi);
+       if (err < 0)
+               dev_err(dsi->dev, "failed to enable MIPI calibration: %d\n",
+                       err);
+
+       err = tegra_dsi_pad_calibrate(dsi);
+       if (err < 0)
+               dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+
+       if (dsi->slave)
+               tegra_dsi_prepare(dsi->slave);
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -887,13 +921,8 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
        struct tegra_dsi *dsi = to_dsi(output);
        struct tegra_dsi_state *state;
        u32 value;
-       int err;
-
-       pm_runtime_get_sync(dsi->dev);
 
-       err = tegra_dsi_pad_calibrate(dsi);
-       if (err < 0)
-               dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+       tegra_dsi_prepare(dsi);
 
        state = tegra_dsi_get_state(dsi);
 
index 4054d80..42c074a 100644 (file)
@@ -354,7 +354,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
        if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
            !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-               ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem);
+               ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu,
+                                     mem);
        else if (bdev->driver->move)
                ret = bdev->driver->move(bo, evict, interruptible,
                                         no_wait_gpu, mem);
index 2df602a..f157a9e 100644 (file)
@@ -45,7 +45,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-                   bool evict,
+                   bool evict, bool interruptible,
                    bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
        struct ttm_tt *ttm = bo->ttm;
@@ -53,6 +53,14 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
        int ret;
 
        if (old_mem->mem_type != TTM_PL_SYSTEM) {
+               ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+
+               if (unlikely(ret != 0)) {
+                       if (ret != -ERESTARTSYS)
+                               pr_err("Failed to expire sync object before unbinding TTM\n");
+                       return ret;
+               }
+
                ttm_tt_unbind(ttm);
                ttm_bo_free_old_node(bo);
                ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
index d5df555..9688bfa 100644 (file)
@@ -203,6 +203,7 @@ static int udl_fb_open(struct fb_info *info, int user)
 
        ufbdev->fb_count++;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        if (fb_defio && (info->fbdefio == NULL)) {
                /* enable defio at last moment if not disabled by client */
 
@@ -218,6 +219,7 @@ static int udl_fb_open(struct fb_info *info, int user)
                info->fbdefio = fbdefio;
                fb_deferred_io_init(info);
        }
+#endif
 
        pr_notice("open /dev/fb%d user=%d fb_info=%p count=%d\n",
                  info->node, user, info, ufbdev->fb_count);
@@ -235,12 +237,14 @@ static int udl_fb_release(struct fb_info *info, int user)
 
        ufbdev->fb_count--;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        if ((ufbdev->fb_count == 0) && (info->fbdefio)) {
                fb_deferred_io_cleanup(info);
                kfree(info->fbdefio);
                info->fbdefio = NULL;
                info->fbops->fb_mmap = udl_fb_mmap;
        }
+#endif
 
        pr_warn("released /dev/fb%d user=%d count=%d\n",
                info->node, user, ufbdev->fb_count);
index 52a6fd2..e00809d 100644 (file)
@@ -242,20 +242,6 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device)
        dev->pads = args.args[0];
        dev->device = device;
 
-       mutex_lock(&dev->mipi->lock);
-
-       if (dev->mipi->usage_count++ == 0) {
-               err = tegra_mipi_power_up(dev->mipi);
-               if (err < 0) {
-                       dev_err(dev->mipi->dev,
-                               "failed to power up MIPI bricks: %d\n",
-                               err);
-                       return ERR_PTR(err);
-               }
-       }
-
-       mutex_unlock(&dev->mipi->lock);
-
        return dev;
 
 put:
@@ -270,29 +256,42 @@ EXPORT_SYMBOL(tegra_mipi_request);
 
 void tegra_mipi_free(struct tegra_mipi_device *device)
 {
-       int err;
+       platform_device_put(device->pdev);
+       kfree(device);
+}
+EXPORT_SYMBOL(tegra_mipi_free);
 
-       mutex_lock(&device->mipi->lock);
+int tegra_mipi_enable(struct tegra_mipi_device *dev)
+{
+       int err = 0;
 
-       if (--device->mipi->usage_count == 0) {
-               err = tegra_mipi_power_down(device->mipi);
-               if (err < 0) {
-                       /*
-                        * Not much that can be done here, so an error message
-                        * will have to do.
-                        */
-                       dev_err(device->mipi->dev,
-                               "failed to power down MIPI bricks: %d\n",
-                               err);
-               }
-       }
+       mutex_lock(&dev->mipi->lock);
 
-       mutex_unlock(&device->mipi->lock);
+       if (dev->mipi->usage_count++ == 0)
+               err = tegra_mipi_power_up(dev->mipi);
+
+       mutex_unlock(&dev->mipi->lock);
+
+       return err;
 
-       platform_device_put(device->pdev);
-       kfree(device);
 }
-EXPORT_SYMBOL(tegra_mipi_free);
+EXPORT_SYMBOL(tegra_mipi_enable);
+
+int tegra_mipi_disable(struct tegra_mipi_device *dev)
+{
+       int err = 0;
+
+       mutex_lock(&dev->mipi->lock);
+
+       if (--dev->mipi->usage_count == 0)
+               err = tegra_mipi_power_down(dev->mipi);
+
+       mutex_unlock(&dev->mipi->lock);
+
+       return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_disable);
 
 static int tegra_mipi_wait(struct tegra_mipi *mipi)
 {
index 730d840..4667012 100644 (file)
@@ -491,7 +491,7 @@ struct it87_sio_data {
 struct it87_data {
        const struct attribute_group *groups[7];
        enum chips type;
-       u16 features;
+       u32 features;
        u8 peci_mask;
        u8 old_peci_mask;
 
@@ -2015,6 +2015,7 @@ static struct attribute *it87_attributes_in[] = {
        &sensor_dev_attr_in10_input.dev_attr.attr,      /* 41 */
        &sensor_dev_attr_in11_input.dev_attr.attr,      /* 41 */
        &sensor_dev_attr_in12_input.dev_attr.attr,      /* 41 */
+       NULL
 };
 
 static const struct attribute_group it87_group_in = {
index f233726..1bb97f6 100644 (file)
@@ -38,6 +38,7 @@
 #define AT91_I2C_TIMEOUT       msecs_to_jiffies(100)   /* transfer timeout */
 #define AT91_I2C_DMA_THRESHOLD 8                       /* enable DMA if transfer size is bigger than this threshold */
 #define AUTOSUSPEND_TIMEOUT            2000
+#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE 256
 
 /* AT91 TWI register definitions */
 #define        AT91_TWI_CR             0x0000  /* Control Register */
@@ -141,6 +142,7 @@ struct at91_twi_dev {
        unsigned twi_cwgr_reg;
        struct at91_twi_pdata *pdata;
        bool use_dma;
+       bool use_alt_cmd;
        bool recv_len_abort;
        u32 fifo_size;
        struct at91_twi_dma dma;
@@ -269,7 +271,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
 
        /* send stop when last byte has been written */
        if (--dev->buf_len == 0)
-               if (!dev->pdata->has_alt_cmd)
+               if (!dev->use_alt_cmd)
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
        dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -292,7 +294,7 @@ static void at91_twi_write_data_dma_callback(void *data)
         * we just have to enable TXCOMP one.
         */
        at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
-       if (!dev->pdata->has_alt_cmd)
+       if (!dev->use_alt_cmd)
                at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 }
 
@@ -410,7 +412,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
        }
 
        /* send stop if second but last byte has been read */
-       if (!dev->pdata->has_alt_cmd && dev->buf_len == 1)
+       if (!dev->use_alt_cmd && dev->buf_len == 1)
                at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
        dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -426,7 +428,7 @@ static void at91_twi_read_data_dma_callback(void *data)
        dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
                         dev->buf_len, DMA_FROM_DEVICE);
 
-       if (!dev->pdata->has_alt_cmd) {
+       if (!dev->use_alt_cmd) {
                /* The last two bytes have to be read without using dma */
                dev->buf += dev->buf_len - 2;
                dev->buf_len = 2;
@@ -443,7 +445,7 @@ static void at91_twi_read_data_dma(struct at91_twi_dev *dev)
        struct dma_chan *chan_rx = dma->chan_rx;
        size_t buf_len;
 
-       buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
+       buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
        dma->direction = DMA_FROM_DEVICE;
 
        /* Keep in mind that we won't use dma to read the last two bytes */
@@ -651,7 +653,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
                unsigned start_flags = AT91_TWI_START;
 
                /* if only one byte is to be read, immediately stop transfer */
-               if (!has_alt_cmd && dev->buf_len <= 1 &&
+               if (!dev->use_alt_cmd && dev->buf_len <= 1 &&
                    !(dev->msg->flags & I2C_M_RECV_LEN))
                        start_flags |= AT91_TWI_STOP;
                at91_twi_write(dev, AT91_TWI_CR, start_flags);
@@ -745,7 +747,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
        int ret;
        unsigned int_addr_flag = 0;
        struct i2c_msg *m_start = msg;
-       bool is_read, use_alt_cmd = false;
+       bool is_read;
 
        dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num);
 
@@ -768,14 +770,16 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
                at91_twi_write(dev, AT91_TWI_IADR, internal_address);
        }
 
+       dev->use_alt_cmd = false;
        is_read = (m_start->flags & I2C_M_RD);
        if (dev->pdata->has_alt_cmd) {
-               if (m_start->len > 0) {
+               if (m_start->len > 0 &&
+                   m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) {
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN);
                        at91_twi_write(dev, AT91_TWI_ACR,
                                       AT91_TWI_ACR_DATAL(m_start->len) |
                                       ((is_read) ? AT91_TWI_ACR_DIR : 0));
-                       use_alt_cmd = true;
+                       dev->use_alt_cmd = true;
                } else {
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS);
                }
@@ -784,7 +788,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
        at91_twi_write(dev, AT91_TWI_MMR,
                       (m_start->addr << 16) |
                       int_addr_flag |
-                      ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
+                      ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
 
        dev->buf_len = m_start->len;
        dev->buf = m_start->buf;
index 19c8438..95f7cac 100644 (file)
@@ -158,7 +158,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data)
 
        if (status & BIT(IS_M_START_BUSY_SHIFT)) {
                iproc_i2c->xfer_is_done = 1;
-               complete_all(&iproc_i2c->done);
+               complete(&iproc_i2c->done);
        }
 
        writel(status, iproc_i2c->base + IS_OFFSET);
index ac9f476..f987432 100644 (file)
@@ -229,7 +229,7 @@ static irqreturn_t bcm_kona_i2c_isr(int irq, void *devid)
                       dev->base + TXFCR_OFFSET);
 
        writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET);
-       complete_all(&dev->done);
+       complete(&dev->done);
 
        return IRQ_HANDLED;
 }
index 3f5a4d7..385b57b 100644 (file)
@@ -228,7 +228,7 @@ static irqreturn_t brcmstb_i2c_isr(int irq, void *devid)
                return IRQ_NONE;
 
        brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE);
-       complete_all(&dev->done);
+       complete(&dev->done);
 
        dev_dbg(dev->device, "isr handled");
        return IRQ_HANDLED;
index a0d95ff..2d5ff86 100644 (file)
@@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
        msg->outsize = request_len;
        msg->insize = response_len;
 
-       result = cros_ec_cmd_xfer(bus->ec, msg);
+       result = cros_ec_cmd_xfer_status(bus->ec, msg);
        if (result < 0) {
                dev_err(dev, "Error transferring EC i2c message %d\n", result);
                goto exit;
index 71d3929..76e2898 100644 (file)
@@ -211,7 +211,7 @@ static void meson_i2c_stop(struct meson_i2c *i2c)
                meson_i2c_add_token(i2c, TOKEN_STOP);
        } else {
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
        }
 }
 
@@ -238,7 +238,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                dev_dbg(i2c->dev, "error bit set\n");
                i2c->error = -ENXIO;
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                goto out;
        }
 
@@ -269,7 +269,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                break;
        case STATE_STOP:
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                break;
        case STATE_IDLE:
                break;
index dfa7a4b..ac88a52 100644 (file)
@@ -379,6 +379,7 @@ static int ocores_i2c_of_probe(struct platform_device *pdev,
                        if (!clock_frequency_present) {
                                dev_err(&pdev->dev,
                                        "Missing required parameter 'opencores,ip-clock-frequency'\n");
+                               clk_disable_unprepare(i2c->clk);
                                return -ENODEV;
                        }
                        i2c->ip_clock_khz = clock_frequency / 1000;
@@ -467,20 +468,21 @@ static int ocores_i2c_probe(struct platform_device *pdev)
                default:
                        dev_err(&pdev->dev, "Unsupported I/O width (%d)\n",
                                i2c->reg_io_width);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err_clk;
                }
        }
 
        ret = ocores_init(&pdev->dev, i2c);
        if (ret)
-               return ret;
+               goto err_clk;
 
        init_waitqueue_head(&i2c->wait);
        ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
                               pdev->name, i2c);
        if (ret) {
                dev_err(&pdev->dev, "Cannot claim IRQ\n");
-               return ret;
+               goto err_clk;
        }
 
        /* hook up driver to tree */
@@ -494,7 +496,7 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        ret = i2c_add_adapter(&i2c->adap);
        if (ret) {
                dev_err(&pdev->dev, "Failed to add adapter\n");
-               return ret;
+               goto err_clk;
        }
 
        /* add in known devices to the bus */
@@ -504,6 +506,10 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        }
 
        return 0;
+
+err_clk:
+       clk_disable_unprepare(i2c->clk);
+       return ret;
 }
 
 static int ocores_i2c_remove(struct platform_device *pdev)
index 8de073a..215ac87 100644 (file)
@@ -68,7 +68,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
        adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
        if (!adap) {
                ret = -ENODEV;
-               goto err;
+               goto err_with_revert;
        }
 
        p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
@@ -103,6 +103,8 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
 
  err_with_put:
        i2c_put_adapter(adap);
+ err_with_revert:
+       of_changeset_revert(&priv->chan[new_chan].chgset);
  err:
        dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
        return ret;
index e6dfa1b..5f65a78 100644 (file)
@@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
-               if (!ndev)
-                       return -ENODEV;
+               if (!ndev) {
+                       ret = -ENODEV;
+                       goto err2;
+               }
 
                if (ndev->flags & IFF_LOOPBACK) {
                        dev_put(ndev);
-                       if (!id_priv->id.device->get_netdev)
-                               return -EOPNOTSUPP;
+                       if (!id_priv->id.device->get_netdev) {
+                               ret = -EOPNOTSUPP;
+                               goto err2;
+                       }
 
                        ndev = id_priv->id.device->get_netdev(id_priv->id.device,
                                                              id_priv->id.port_num);
-                       if (!ndev)
-                               return -ENODEV;
+                       if (!ndev) {
+                               ret = -ENODEV;
+                               goto err2;
+                       }
                }
 
                route->path_rec->net = &init_net;
index 3aca7f6..b6a953a 100644 (file)
@@ -1827,8 +1827,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
                                (ep->mpa_pkt + sizeof(*mpa));
                        ep->ird = ntohs(mpa_v2_params->ird) &
                                MPA_V2_IRD_ORD_MASK;
+                       ep->ird = min_t(u32, ep->ird,
+                                       cur_max_read_depth(ep->com.dev));
                        ep->ord = ntohs(mpa_v2_params->ord) &
                                MPA_V2_IRD_ORD_MASK;
+                       ep->ord = min_t(u32, ep->ord,
+                                       cur_max_read_depth(ep->com.dev));
                        PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
                             ep->ord);
                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
@@ -3136,7 +3140,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
                if (conn_param->ord > ep->ird) {
                        if (RELAXED_IRD_NEGOTIATION) {
-                               ep->ord = ep->ird;
+                               conn_param->ord = ep->ird;
                        } else {
                                ep->ird = conn_param->ird;
                                ep->ord = conn_param->ord;
index 812ab72..ac926c9 100644 (file)
@@ -1016,15 +1016,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
        struct c4iw_cq *chp;
-       int ret;
+       int ret = 0;
        unsigned long flag;
 
        chp = to_c4iw_cq(ibcq);
        spin_lock_irqsave(&chp->lock, flag);
-       ret = t4_arm_cq(&chp->cq,
-                       (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+       t4_arm_cq(&chp->cq,
+                 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+       if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+               ret = t4_cq_notempty(&chp->cq);
        spin_unlock_irqrestore(&chp->lock, flag);
-       if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
-               ret = 0;
        return ret;
 }
index 6126bbe..02173f4 100644 (file)
@@ -634,6 +634,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe)
        return (CQE_GENBIT(cqe) == cq->gen);
 }
 
+static inline int t4_cq_notempty(struct t4_cq *cq)
+{
+       return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]);
+}
+
 static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 {
        int ret;
index 79575ee..0566393 100644 (file)
@@ -47,7 +47,6 @@
 #include <linux/topology.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
-#include <linux/cpumask.h>
 
 #include "hfi.h"
 #include "affinity.h"
@@ -682,7 +681,7 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
                           size_t count)
 {
        struct hfi1_affinity_node *entry;
-       struct cpumask mask;
+       cpumask_var_t mask;
        int ret, i;
 
        spin_lock(&node_affinity.lock);
@@ -692,19 +691,24 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
        if (!entry)
                return -EINVAL;
 
-       ret = cpulist_parse(buf, &mask);
+       ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+       if (!ret)
+               return -ENOMEM;
+
+       ret = cpulist_parse(buf, mask);
        if (ret)
-               return ret;
+               goto out;
 
-       if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+       if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
                dd_dev_warn(dd, "Invalid CPU mask\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        mutex_lock(&sdma_affinity_mutex);
        /* reset the SDMA interrupt affinity details */
        init_cpu_mask_set(&entry->def_intr);
-       cpumask_copy(&entry->def_intr.mask, &mask);
+       cpumask_copy(&entry->def_intr.mask, mask);
        /*
         * Reassign the affinity for each SDMA interrupt.
         */
@@ -720,8 +724,9 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
                if (ret)
                        break;
        }
-
        mutex_unlock(&sdma_affinity_mutex);
+out:
+       free_cpumask_var(mask);
        return ret ? ret : strnlen(buf, PAGE_SIZE);
 }
 
index dbab9d9..a49cc88 100644 (file)
@@ -223,28 +223,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
 DEBUGFS_FILE_OPS(ctx_stats);
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
+       __acquires(RCU)
 {
        struct qp_iter *iter;
        loff_t n = *pos;
 
-       rcu_read_lock();
        iter = qp_iter_init(s->private);
+
+       /* stop calls rcu_read_unlock */
+       rcu_read_lock();
+
        if (!iter)
                return NULL;
 
-       while (n--) {
+       do {
                if (qp_iter_next(iter)) {
                        kfree(iter);
                        return NULL;
                }
-       }
+       } while (n--);
 
        return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
                                loff_t *pos)
+       __must_hold(RCU)
 {
        struct qp_iter *iter = iter_ptr;
 
@@ -259,7 +263,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
-__releases(RCU)
+       __releases(RCU)
 {
        rcu_read_unlock();
 }
index 8246dc7..303f105 100644 (file)
@@ -888,14 +888,15 @@ void set_all_slowpath(struct hfi1_devdata *dd)
 }
 
 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
-                                     struct hfi1_packet packet,
+                                     struct hfi1_packet *packet,
                                      struct hfi1_devdata *dd)
 {
        struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
-       struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
-                                                            packet.rhf_addr);
+       struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+                                                            packet->rhf_addr);
+       u8 etype = rhf_rcv_type(packet->rhf);
 
-       if (hdr2sc(hdr, packet.rhf) != 0xf) {
+       if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
                int hwstate = read_logical_state(dd);
 
                if (hwstate != LSTATE_ACTIVE) {
@@ -979,7 +980,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                        /* Auto activate link on non-SC15 packet receive */
                        if (unlikely(rcd->ppd->host_link_state ==
                                     HLS_UP_ARMED) &&
-                           set_armed_to_active(rcd, packet, dd))
+                           set_armed_to_active(rcd, &packet, dd))
                                goto bail;
                        last = process_rcv_packet(&packet, thread);
                }
index 1ecbec1..7e03ccd 100644 (file)
@@ -183,6 +183,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
        if (fd) {
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
+               atomic_inc(&fd->mm->mm_count);
        }
 
        fp->private_data = fd;
@@ -222,7 +223,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
                ret = assign_ctxt(fp, &uinfo);
                if (ret < 0)
                        return ret;
-               setup_ctxt(fp);
+               ret = setup_ctxt(fp);
                if (ret)
                        return ret;
                ret = user_init(fp);
@@ -779,6 +780,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        mutex_unlock(&hfi1_mutex);
        hfi1_free_ctxtdata(dd, uctxt);
 done:
+       mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
        kfree(fdata);
        return 0;
index 1000e0f..a021e66 100644 (file)
@@ -1272,9 +1272,26 @@ static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
               ((!!(rhf_dc_info(rhf))) << 4);
 }
 
+#define HFI1_JKEY_WIDTH       16
+#define HFI1_JKEY_MASK        (BIT(16) - 1)
+#define HFI1_ADMIN_JKEY_RANGE 32
+
+/*
+ * J_KEYs are split and allocated in the following groups:
+ *   0 - 31    - users with administrator privileges
+ *  32 - 63    - kernel protocols using KDETH packets
+ *  64 - 65535 - all other users using KDETH packets
+ */
 static inline u16 generate_jkey(kuid_t uid)
 {
-       return from_kuid(current_user_ns(), uid) & 0xffff;
+       u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
+
+       if (capable(CAP_SYS_ADMIN))
+               jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
+       else if (jkey < 64)
+               jkey |= BIT(HFI1_JKEY_WIDTH - 1);
+
+       return jkey;
 }
 
 /*
@@ -1656,7 +1673,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
                                  const struct pci_device_id *);
 void hfi1_free_devdata(struct hfi1_devdata *);
-void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
 /* LED beaconing functions */
index a358d23..b793545 100644 (file)
@@ -1333,7 +1333,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
                spin_unlock(&ppd->cc_state_lock);
 
                if (cc_state)
-                       call_rcu(&cc_state->rcu, cc_state_reclaim);
+                       kfree_rcu(cc_state, rcu);
        }
 
        free_credit_return(dd);
index 1263abe..39e42c3 100644 (file)
@@ -1819,6 +1819,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
        u32 len = OPA_AM_CI_LEN(am) + 1;
        int ret;
 
+       if (dd->pport->port_type != PORT_TYPE_QSFP) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
@@ -3398,7 +3403,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
 
        spin_unlock(&ppd->cc_state_lock);
 
-       call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+       kfree_rcu(old_cc_state, rcu);
 }
 
 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
@@ -3553,13 +3558,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        return reply((struct ib_mad_hdr *)smp);
 }
 
-void cc_state_reclaim(struct rcu_head *rcu)
-{
-       struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
-
-       kfree(cc_state);
-}
-
 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
                                   u32 *resp_len)
index a5aa351..4e4d831 100644 (file)
@@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
 
        iter->dev = dev;
        iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
-       if (qp_iter_next(iter)) {
-               kfree(iter);
-               return NULL;
-       }
 
        return iter;
 }
index a207717..4e95ad8 100644 (file)
@@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
                   u8 *data)
 {
        struct hfi1_pportdata *ppd;
-       u32 excess_len = 0;
-       int ret = 0;
+       u32 excess_len = len;
+       int ret = 0, offset = 0;
 
        if (port_num > dd->num_pports || port_num < 1) {
                dd_dev_info(dd, "%s: Invalid port number %d\n",
@@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
        }
 
        memcpy(data, &ppd->qsfp_info.cache[addr], len);
+
+       if (addr <= QSFP_MONITOR_VAL_END &&
+           (addr + len) >= QSFP_MONITOR_VAL_START) {
+               /* Overlap with the dynamic channel monitor range */
+               if (addr < QSFP_MONITOR_VAL_START) {
+                       if (addr + len <= QSFP_MONITOR_VAL_END)
+                               len = addr + len - QSFP_MONITOR_VAL_START;
+                       else
+                               len = QSFP_MONITOR_RANGE;
+                       offset = QSFP_MONITOR_VAL_START - addr;
+                       addr = QSFP_MONITOR_VAL_START;
+               } else if (addr == QSFP_MONITOR_VAL_START) {
+                       offset = 0;
+                       if (addr + len > QSFP_MONITOR_VAL_END)
+                               len = QSFP_MONITOR_RANGE;
+               } else {
+                       offset = 0;
+                       if (addr + len > QSFP_MONITOR_VAL_END)
+                               len = QSFP_MONITOR_VAL_END - addr + 1;
+               }
+               /* Refresh the values of the dynamic monitors from the cable */
+               ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len);
+               if (ret != len) {
+                       ret = -EAGAIN;
+                       goto set_zeroes;
+               }
+       }
+
        return 0;
 
 set_zeroes:
index 69275eb..36cf523 100644 (file)
@@ -74,6 +74,9 @@
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
+#define QSFP_MONITOR_VAL_START 22
+#define QSFP_MONITOR_VAL_END 81
+#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1)
 #define QSFP_TX_CTRL_BYTE_OFFS 86
 #define QSFP_PWR_CTRL_BYTE_OFFS 93
 #define QSFP_CDR_CTRL_BYTE_OFFS 98
index b738acd..8ec09e4 100644 (file)
@@ -232,7 +232,7 @@ struct i40iw_device {
        struct i40e_client *client;
        struct i40iw_hw hw;
        struct i40iw_cm_core cm_core;
-       unsigned long *mem_resources;
+       u8 *mem_resources;
        unsigned long *allocated_qps;
        unsigned long *allocated_cqs;
        unsigned long *allocated_mrs;
@@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
        *next = resource_num + 1;
        if (*next == max_resources)
                *next = 0;
-       spin_unlock_irqrestore(&iwdev->resource_lock, flags);
        *req_resource_num = resource_num;
+       spin_unlock_irqrestore(&iwdev->resource_lock, flags);
 
        return 0;
 }
index 5026dc7..7ca0638 100644 (file)
@@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
                buf += hdr_len;
        }
 
-       if (pd_len)
-               memcpy(buf, pdata->addr, pd_len);
+       if (pdata && pdata->addr)
+               memcpy(buf, pdata->addr, pdata->size);
 
        atomic_set(&sqbuf->refcount, 1);
 
@@ -3346,26 +3346,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp)
        return 0;
 }
 
-/**
- * i40iw_loopback_nop - Send a nop
- * @qp: associated hw qp
- */
-static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
-{
-       u64 *wqe;
-       u64 header;
-
-       wqe = qp->qp_uk.sq_base->elem;
-       set_64bit_val(wqe, 0, 0);
-       set_64bit_val(wqe, 8, 0);
-       set_64bit_val(wqe, 16, 0);
-
-       header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
-           LS_64(0, I40IWQPSQ_SIGCOMPL) |
-           LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-       set_64bit_val(wqe, 24, header);
-}
-
 /**
  * i40iw_qp_disconnect - free qp and close cm
  * @iwqp: associate qp for the connection
@@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        } else {
                if (iwqp->page)
                        iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
-               i40iw_loopback_nop(&iwqp->sc_qp);
+               dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
        }
 
        if (iwqp->page)
index 6e90813..0cbbe40 100644 (file)
@@ -1558,6 +1558,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
        enum i40iw_status_code status;
        struct i40iw_handler *hdl;
 
+       hdl = i40iw_find_netdev(ldev->netdev);
+       if (hdl)
+               return 0;
+
        hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
        if (!hdl)
                return -ENOMEM;
index 0e8db0a..6fd043b 100644 (file)
@@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
 {
        if (!mem)
                return I40IW_ERR_PARAM;
+       /*
+        * mem->va points to the parent of mem, so both mem and mem->va
+        * can not be touched once mem->va is freed
+        */
        kfree(mem->va);
-       mem->va = NULL;
        return 0;
 }
 
index 2360338..6329c97 100644 (file)
@@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        return &iwqp->ibqp;
 error:
        i40iw_free_qp_resources(iwdev, iwqp, qp_num);
-       kfree(mem);
        return ERR_PTR(err_code);
 }
 
@@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
                }
                if (iwpbl->pbl_allocated)
                        i40iw_free_pble(iwdev->pble_rsrc, palloc);
-               kfree(iwpbl->iwmr);
-               iwpbl->iwmr = NULL;
+               kfree(iwmr);
                return 0;
        }
 
index d6fc8a6..006db64 100644 (file)
@@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
                checksum == cpu_to_be16(0xffff);
 }
 
-static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
-                          unsigned tail, struct mlx4_cqe *cqe, int is_eth)
+static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+                           unsigned tail, struct mlx4_cqe *cqe, int is_eth)
 {
        struct mlx4_ib_proxy_sqp_hdr *hdr;
 
@@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
                wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
                wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
        }
-
-       return 0;
 }
 
 static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
@@ -692,7 +690,7 @@ repoll:
        if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
                     is_send)) {
                pr_warn("Completion for NOP opcode detected!\n");
-               return -EINVAL;
+               return -EAGAIN;
        }
 
        /* Resize CQ in progress */
@@ -723,7 +721,7 @@ repoll:
                if (unlikely(!mqp)) {
                        pr_warn("CQ %06x with entry for unknown QPN %06x\n",
                               cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
-                       return -EINVAL;
+                       return -EAGAIN;
                }
 
                *cur_qp = to_mibqp(mqp);
@@ -741,7 +739,7 @@ repoll:
                if (unlikely(!msrq)) {
                        pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
                                cq->mcq.cqn, srq_num);
-                       return -EINVAL;
+                       return -EAGAIN;
                }
        }
 
@@ -852,9 +850,11 @@ repoll:
                if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
                        if ((*cur_qp)->mlx4_ib_qp_type &
                            (MLX4_IB_QPT_PROXY_SMI_OWNER |
-                            MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
-                               return use_tunnel_data(*cur_qp, cq, wc, tail,
-                                                      cqe, is_eth);
+                            MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+                               use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
+                                               is_eth);
+                               return 0;
+                       }
                }
 
                wc->slid           = be16_to_cpu(cqe->rlid);
index a84bb76..1b4094b 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <linux/io-mapping.h>
 #if defined(CONFIG_X86)
 #include <asm/pat.h>
 #endif
index 16740dc..67fc0b6 100644 (file)
@@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_srq =
                (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
                OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
-       attr->max_send_sge = ((rsp->max_write_send_sge &
+       attr->max_send_sge = ((rsp->max_recv_send_sge &
                               OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
                              OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
-       attr->max_recv_sge = (rsp->max_write_send_sge &
-                             OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
-           OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+       attr->max_recv_sge = (rsp->max_recv_send_sge &
+                             OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >>
+           OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT;
        attr->max_srq_sge = (rsp->max_srq_rqe_sge &
                              OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
-       attr->max_rdma_sge = (rsp->max_write_send_sge &
-                             OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
-           OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
+       attr->max_rdma_sge = (rsp->max_wr_rd_sge &
+                             OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >>
+           OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT;
        attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
index 0efc966..37df448 100644 (file)
@@ -554,9 +554,9 @@ enum {
        OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK               = 0x18,
        OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT         = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK          = 0xFFFF,
-       OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT        = 16,
-       OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK         = 0xFFFF <<
-                               OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
+       OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16,
+       OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK          = 0xFFFF <<
+                               OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT,
 
        OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT       = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK        = 0xFFFF,
@@ -612,6 +612,8 @@ enum {
        OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET         = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK           = 0xFFFF <<
                                OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+       OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT           = 0,
+       OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK            = 0xFFFF,
 };
 
 struct ocrdma_mbx_query_config {
@@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config {
        struct ocrdma_mbx_rsp rsp;
        u32 qp_srq_cq_ird_ord;
        u32 max_pd_ca_ack_delay;
-       u32 max_write_send_sge;
+       u32 max_recv_send_sge;
        u32 max_ird_ord_per_qp;
        u32 max_shared_ird_ord;
        u32 max_mr;
@@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config {
        u32 max_wqes_rqes_per_q;
        u32 max_cq_cqes_per_cq;
        u32 max_srq_rqe_sge;
+       u32 max_wr_rd_sge;
+       u32 ird_pgsz_num_pages;
 };
 
 struct ocrdma_fw_ver_rsp {
index b1a3d91..0aa8547 100644 (file)
@@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
                                        IB_DEVICE_SYS_IMAGE_GUID |
                                        IB_DEVICE_LOCAL_DMA_LKEY |
                                        IB_DEVICE_MEM_MGT_EXTENSIONS;
-       attr->max_sge = dev->attr.max_send_sge;
-       attr->max_sge_rd = attr->max_sge;
+       attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+       attr->max_sge_rd = dev->attr.max_rdma_sge;
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
index 5e75b43..5bad8e3 100644 (file)
@@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
 DEBUGFS_FILE(ctx_stats)
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+       __acquires(RCU)
 {
        struct qib_qp_iter *iter;
        loff_t n = *pos;
 
-       rcu_read_lock();
        iter = qib_qp_iter_init(s->private);
+
+       /* stop calls rcu_read_unlock */
+       rcu_read_lock();
+
        if (!iter)
                return NULL;
 
-       while (n--) {
+       do {
                if (qib_qp_iter_next(iter)) {
                        kfree(iter);
                        return NULL;
                }
-       }
+       } while (n--);
 
        return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
                                   loff_t *pos)
+       __must_hold(RCU)
 {
        struct qib_qp_iter *iter = iter_ptr;
 
@@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+       __releases(RCU)
 {
        rcu_read_unlock();
 }
index fcdf379..c3edc03 100644 (file)
@@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 
        pos = *ppos;
 
-       if (pos != 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (count != sizeof(struct qib_flash)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       tmp = kmalloc(count, GFP_KERNEL);
-       if (!tmp) {
-               ret = -ENOMEM;
-               goto bail;
-       }
+       if (pos != 0 || count != sizeof(struct qib_flash))
+               return -EINVAL;
 
-       if (copy_from_user(tmp, buf, count)) {
-               ret = -EFAULT;
-               goto bail_tmp;
-       }
+       tmp = memdup_user(buf, count);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
 
        dd = private2dd(file);
        if (qib_eeprom_write(dd, pos, tmp, count)) {
@@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 
 bail_tmp:
        kfree(tmp);
-
-bail:
        return ret;
 }
 
index 9cc0aae..f9b8cd2 100644 (file)
@@ -573,10 +573,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
                return NULL;
 
        iter->dev = dev;
-       if (qib_qp_iter_next(iter)) {
-               kfree(iter);
-               return NULL;
-       }
 
        return iter;
 }
index c229b9f..0a89a95 100644 (file)
@@ -664,7 +664,8 @@ static int __init usnic_ib_init(void)
                return err;
        }
 
-       if (pci_register_driver(&usnic_ib_pci_driver)) {
+       err = pci_register_driver(&usnic_ib_pci_driver);
+       if (err) {
                usnic_err("Unable to register with PCI\n");
                goto out_umem_fini;
        }
index bdb540f..870b4f2 100644 (file)
@@ -873,7 +873,8 @@ bail_qpn:
        free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
 
 bail_rq_wq:
-       vfree(qp->r_rq.wq);
+       if (!qp->ip)
+               vfree(qp->r_rq.wq);
 
 bail_driver_priv:
        rdi->driver_f.qp_priv_free(rdi, qp);
index ba6be06..7914c14 100644 (file)
@@ -448,7 +448,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn,
 
        isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
        if (!isert_conn->login_rsp_buf) {
-               isert_err("Unable to allocate isert_conn->login_rspbuf\n");
+               ret = -ENOMEM;
                goto out_unmap_login_req_buf;
        }
 
index dfa23b0..883bbfe 100644 (file)
@@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport)
        if (ret)
                goto err_query_port;
 
+       snprintf(sport->port_guid, sizeof(sport->port_guid),
+               "0x%016llx%016llx",
+               be64_to_cpu(sport->gid.global.subnet_prefix),
+               be64_to_cpu(sport->gid.global.interface_id));
+
        if (!sport->mad_agent) {
                memset(&reg_req, 0, sizeof(reg_req));
                reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device)
                               sdev->device->name, i);
                        goto err_ring;
                }
-               snprintf(sport->port_guid, sizeof(sport->port_guid),
-                       "0x%016llx%016llx",
-                       be64_to_cpu(sport->gid.global.subnet_prefix),
-                       be64_to_cpu(sport->gid.global.interface_id));
        }
 
        spin_lock(&srpt_dev_lock);
index 7d61439..0c07e10 100644 (file)
@@ -376,7 +376,7 @@ static int tegra_kbc_start(struct tegra_kbc *kbc)
        /* Reset the KBC controller to clear all previous status.*/
        reset_control_assert(kbc->rst);
        udelay(100);
-       reset_control_assert(kbc->rst);
+       reset_control_deassert(kbc->rst);
        udelay(100);
 
        tegra_kbc_config_pins(kbc);
index faa295e..c83bce8 100644 (file)
@@ -553,7 +553,6 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
                goto free_struct_buff;
 
        reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS);
-       map_offset = 0;
        for (i = 0; i < rdesc->num_registers; i++) {
                struct rmi_register_desc_item *item = &rdesc->registers[i];
                int reg_size = struct_buf[offset];
@@ -576,6 +575,8 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
                item->reg = reg;
                item->reg_size = reg_size;
 
+               map_offset = 0;
+
                do {
                        for (b = 0; b < 7; b++) {
                                if (struct_buf[offset] & (0x1 << b))
index b4d3408..405252a 100644 (file)
@@ -1305,6 +1305,7 @@ static int __init i8042_create_aux_port(int idx)
        serio->write            = i8042_aux_write;
        serio->start            = i8042_start;
        serio->stop             = i8042_stop;
+       serio->ps2_cmd_mutex    = &i8042_mutex;
        serio->port_data        = port;
        serio->dev.parent       = &i8042_platform_device->dev;
        if (idx < 0) {
index a61b215..1ce3ecb 100644 (file)
@@ -1473,7 +1473,6 @@ static int ads7846_remove(struct spi_device *spi)
 
        ads784x_hwmon_unregister(spi, ts);
 
-       regulator_disable(ts->reg);
        regulator_put(ts->reg);
 
        if (!ts->get_pendown_state) {
index 7379fe1..b2744a6 100644 (file)
@@ -464,7 +464,7 @@ static int silead_ts_probe(struct i2c_client *client,
                return -ENODEV;
 
        /* Power GPIO pin */
-       data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+       data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
        if (IS_ERR(data->gpio_power)) {
                if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
                        dev_err(dev, "Shutdown GPIO request failed\n");
index ce80117..641e887 100644 (file)
@@ -879,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
         * We may have concurrent producers, so we need to be careful
         * not to touch any of the shadow cmdq state.
         */
-       queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
+       queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
        dev_err(smmu->dev, "skipping command in error state:\n");
        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -890,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
                return;
        }
 
-       queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
+       queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -1034,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
                case STRTAB_STE_0_CFG_S2_TRANS:
                        ste_live = true;
                        break;
+               case STRTAB_STE_0_CFG_ABORT:
+                       if (disable_bypass)
+                               break;
                default:
                        BUG(); /* STE corruption */
                }
index 4f49fe2..2db74eb 100644 (file)
@@ -686,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = {
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
-       int flags, ret;
-       u32 fsr, fsynr, resume;
+       u32 fsr, fsynr;
        unsigned long iova;
        struct iommu_domain *domain = dev;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -701,34 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
        if (!(fsr & FSR_FAULT))
                return IRQ_NONE;
 
-       if (fsr & FSR_IGN)
-               dev_err_ratelimited(smmu->dev,
-                                   "Unexpected context fault (fsr 0x%x)\n",
-                                   fsr);
-
        fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
-       flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
-
        iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
-       if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
-               ret = IRQ_HANDLED;
-               resume = RESUME_RETRY;
-       } else {
-               dev_err_ratelimited(smmu->dev,
-                   "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-                   iova, fsynr, cfg->cbndx);
-               ret = IRQ_NONE;
-               resume = RESUME_TERMINATE;
-       }
-
-       /* Clear the faulting FSR */
-       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
 
-       /* Retry or terminate any stalled transactions */
-       if (fsr & FSR_SS)
-               writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
+       dev_err_ratelimited(smmu->dev,
+       "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+                           fsr, iova, fsynr, cfg->cbndx);
 
-       return ret;
+       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+       return IRQ_HANDLED;
 }
 
 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -837,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
        }
 
        /* SCTLR */
-       reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+       reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
        if (stage1)
                reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
index 08a1e2f..00c8a08 100644 (file)
@@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
        if (!iovad)
                return;
 
-       put_iova_domain(iovad);
+       if (iovad->granule)
+               put_iova_domain(iovad);
        kfree(iovad);
        domain->iova_cookie = NULL;
 }
@@ -151,12 +152,15 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
        }
 }
 
-static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
                dma_addr_t dma_limit)
 {
+       struct iova_domain *iovad = domain->iova_cookie;
        unsigned long shift = iova_shift(iovad);
        unsigned long length = iova_align(iovad, size) >> shift;
 
+       if (domain->geometry.force_aperture)
+               dma_limit = min(dma_limit, domain->geometry.aperture_end);
        /*
         * Enforce size-alignment to be safe - there could perhaps be an
         * attribute to control this per-device, or at least per-domain...
@@ -314,7 +318,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
        if (!pages)
                return NULL;
 
-       iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+       iova = __alloc_iova(domain, size, dev->coherent_dma_mask);
        if (!iova)
                goto out_free_pages;
 
@@ -386,7 +390,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
        phys_addr_t phys = page_to_phys(page) + offset;
        size_t iova_off = iova_offset(iovad, phys);
        size_t len = iova_align(iovad, size + iova_off);
-       struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+       struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
 
        if (!iova)
                return DMA_ERROR_CODE;
@@ -538,7 +542,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
                prev = s;
        }
 
-       iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+       iova = __alloc_iova(domain, iova_len, dma_get_mask(dev));
        if (!iova)
                goto out_restore_sg;
 
index 8c61399..def8ca1 100644 (file)
@@ -286,12 +286,14 @@ static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
        int prot = IOMMU_READ;
        arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 
-       if (attr & ARM_V7S_PTE_AP_RDONLY)
+       if (!(attr & ARM_V7S_PTE_AP_RDONLY))
                prot |= IOMMU_WRITE;
        if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
                prot |= IOMMU_MMIO;
        else if (pte & ARM_V7S_ATTR_C)
                prot |= IOMMU_CACHE;
+       if (pte & ARM_V7S_ATTR_XN(lvl))
+               prot |= IOMMU_NOEXEC;
 
        return prot;
 }
index 9ed0a84..3dab13b 100644 (file)
@@ -55,19 +55,19 @@ struct mtk_iommu_data {
        bool                            enable_4GB;
 };
 
-static int compare_of(struct device *dev, void *data)
+static inline int compare_of(struct device *dev, void *data)
 {
        return dev->of_node == data;
 }
 
-static int mtk_iommu_bind(struct device *dev)
+static inline int mtk_iommu_bind(struct device *dev)
 {
        struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
        return component_bind_all(dev, &data->smi_imu);
 }
 
-static void mtk_iommu_unbind(struct device *dev)
+static inline void mtk_iommu_unbind(struct device *dev)
 {
        struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
index 7ceaba8..36b9c28 100644 (file)
@@ -1545,7 +1545,12 @@ static int its_force_quiescent(void __iomem *base)
        u32 val;
 
        val = readl_relaxed(base + GITS_CTLR);
-       if (val & GITS_CTLR_QUIESCENT)
+       /*
+        * GIC architecture specification requires the ITS to be both
+        * disabled and quiescent for writes to GITS_BASER<n> or
+        * GITS_CBASER to not have UNPREDICTABLE results.
+        */
+       if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE))
                return 0;
 
        /* Disable the generation of all interrupts to this ITS */
index 6fc56c3..ede5672 100644 (file)
@@ -667,13 +667,20 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 #endif
 
 #ifdef CONFIG_CPU_PM
+/* Check whether it's single security state view */
+static bool gic_dist_security_disabled(void)
+{
+       return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
+}
+
 static int gic_cpu_pm_notifier(struct notifier_block *self,
                               unsigned long cmd, void *v)
 {
        if (cmd == CPU_PM_EXIT) {
-               gic_enable_redist(true);
+               if (gic_dist_security_disabled())
+                       gic_enable_redist(true);
                gic_cpu_sys_reg_init();
-       } else if (cmd == CPU_PM_ENTER) {
+       } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) {
                gic_write_grpen1(0);
                gic_enable_redist(false);
        }
index c2cab57..390fac5 100644 (file)
@@ -769,6 +769,13 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
        int cpu;
        unsigned long flags, map = 0;
 
+       if (unlikely(nr_cpu_ids == 1)) {
+               /* Only one CPU? let's do a self-IPI... */
+               writel_relaxed(2 << 24 | irq,
+                              gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+               return;
+       }
+
        raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
        /* Convert our logical CPU mask into a physical one. */
index c5f33c3..83f4983 100644 (file)
@@ -713,9 +713,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
        unsigned long flags;
        int i;
 
-       irq_set_chip_and_handler(virq, &gic_level_irq_controller,
-                                handle_level_irq);
-
        spin_lock_irqsave(&gic_lock, flags);
        gic_map_to_pin(intr, gic_cpu_pin);
        gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
@@ -732,6 +729,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
 {
        if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
                return gic_local_irq_domain_map(d, virq, hw);
+
+       irq_set_chip_and_handler(virq, &gic_level_irq_controller,
+                                handle_level_irq);
+
        return gic_shared_irq_domain_map(d, virq, hw, 0);
 }
 
@@ -771,11 +772,13 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
                        hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i);
 
                        ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq,
-                                                           &gic_edge_irq_controller,
+                                                           &gic_level_irq_controller,
                                                            NULL);
                        if (ret)
                                goto error;
 
+                       irq_set_handler(virq + i, handle_level_irq);
+
                        ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu);
                        if (ret)
                                goto error;
@@ -890,10 +893,17 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
        return;
 }
 
+static void gic_dev_domain_activate(struct irq_domain *domain,
+                                   struct irq_data *d)
+{
+       gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
+}
+
 static struct irq_domain_ops gic_dev_domain_ops = {
        .xlate = gic_dev_domain_xlate,
        .alloc = gic_dev_domain_alloc,
        .free = gic_dev_domain_free,
+       .activate = gic_dev_domain_activate,
 };
 
 static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
index 978eda8..8a3ba56 100644 (file)
@@ -73,7 +73,6 @@ MODULE_DEVICE_TABLE(i2c, ams_id);
 static struct i2c_driver ams_i2c_driver = {
        .driver = {
                .name   = "ams",
-               .owner  = THIS_MODULE,
        },
        .probe          = ams_i2c_probe,
        .remove         = ams_i2c_remove,
index 3024685..96d16fc 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver wf_pm112_driver = {
        .remove = wf_pm112_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 2f506b9..e88cfb3 100644 (file)
@@ -789,7 +789,6 @@ static struct platform_driver wf_pm72_driver = {
        .remove = wf_pm72_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 82fc86a..bdfcb8a 100644 (file)
@@ -682,7 +682,6 @@ static struct platform_driver wf_rm31_driver = {
        .remove = wf_rm31_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 95a4ca6..849ad44 100644 (file)
@@ -760,7 +760,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
        if (!d->nr_stripes ||
            d->nr_stripes > INT_MAX ||
            d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
-               pr_err("nr_stripes too large");
+               pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+                       (unsigned)d->nr_stripes);
                return -ENOMEM;
        }
 
@@ -1820,7 +1821,7 @@ static int cache_alloc(struct cache *ca)
        free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
        if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
-           !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+           !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
            !init_fifo(&ca->free_inc,   free << 2, GFP_KERNEL) ||
@@ -1844,7 +1845,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                                struct block_device *bdev, struct cache *ca)
 {
        char name[BDEVNAME_SIZE];
-       const char *err = NULL;
+       const char *err = NULL; /* must be set for any error case */
        int ret = 0;
 
        memcpy(&ca->sb, sb, sizeof(struct cache_sb));
@@ -1861,8 +1862,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                ca->discard = CACHE_DISCARD(&ca->sb);
 
        ret = cache_alloc(ca);
-       if (ret != 0)
+       if (ret != 0) {
+               if (ret == -ENOMEM)
+                       err = "cache_alloc(): -ENOMEM";
+               else
+                       err = "cache_alloc(): unknown error";
                goto err;
+       }
 
        if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
                err = "error calling kobject_add";
index 4e9784b..eedba67 100644 (file)
@@ -181,7 +181,7 @@ struct crypt_config {
        u8 key[0];
 };
 
-#define MIN_IOS        16
+#define MIN_IOS        64
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
index 97e446d..6a2e8dd 100644 (file)
@@ -289,15 +289,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
                pb->bio_submitted = true;
 
                /*
-                * Map reads as normal only if corrupt_bio_byte set.
+                * Error reads if neither corrupt_bio_byte or drop_writes are set.
+                * Otherwise, flakey_end_io() will decide if the reads should be modified.
                 */
                if (bio_data_dir(bio) == READ) {
-                       /* If flags were specified, only corrupt those that match. */
-                       if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
-                           all_corrupt_bio_flags_match(bio, fc))
-                               goto map_bio;
-                       else
+                       if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
                                return -EIO;
+                       goto map_bio;
                }
 
                /*
@@ -334,14 +332,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
        struct flakey_c *fc = ti->private;
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-       /*
-        * Corrupt successful READs while in down state.
-        */
        if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
-               if (fc->corrupt_bio_byte)
+               if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
+                   all_corrupt_bio_flags_match(bio, fc)) {
+                       /*
+                        * Corrupt successful matching READs while in down state.
+                        */
                        corrupt_bio_data(bio, fc);
-               else
+
+               } else if (!test_bit(DROP_WRITES, &fc->flags)) {
+                       /*
+                        * Error read during the down_interval if drop_writes
+                        * wasn't configured.
+                        */
                        return -EIO;
+               }
        }
 
        return error;
index 4ca2d1d..07fc1ad 100644 (file)
@@ -291,9 +291,10 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis
        core->nr_regions = le64_to_cpu(disk->nr_regions);
 }
 
-static int rw_header(struct log_c *lc, int rw)
+static int rw_header(struct log_c *lc, int op)
 {
-       lc->io_req.bi_op = rw;
+       lc->io_req.bi_op = op;
+       lc->io_req.bi_op_flags = 0;
 
        return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -316,7 +317,7 @@ static int read_header(struct log_c *log)
 {
        int r;
 
-       r = rw_header(log, READ);
+       r = rw_header(log, REQ_OP_READ);
        if (r)
                return r;
 
@@ -630,7 +631,7 @@ static int disk_resume(struct dm_dirty_log *log)
        header_to_disk(&lc->header, lc->disk_header);
 
        /* write the new header */
-       r = rw_header(lc, WRITE);
+       r = rw_header(lc, REQ_OP_WRITE);
        if (!r) {
                r = flush_header(lc);
                if (r)
@@ -698,7 +699,7 @@ static int disk_flush(struct dm_dirty_log *log)
                        log_clear_bit(lc, lc->clean_bits, i);
        }
 
-       r = rw_header(lc, WRITE);
+       r = rw_header(lc, REQ_OP_WRITE);
        if (r)
                fail_log_device(lc);
        else {
index 1b9795d..8abde6b 100644 (file)
@@ -191,7 +191,6 @@ struct raid_dev {
 #define RT_FLAG_RS_BITMAP_LOADED       2
 #define RT_FLAG_UPDATE_SBS             3
 #define RT_FLAG_RESHAPE_RS             4
-#define RT_FLAG_KEEP_RS_FROZEN         5
 
 /* Array elements of 64 bit needed for rebuild/failed disk bits */
 #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
 {
        unsigned long min_region_size = rs->ti->len / (1 << 21);
 
+       if (rs_is_raid0(rs))
+               return 0;
+
        if (!region_size) {
                /*
                 * Choose a reasonable default.  All figures in sectors.
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
                        rebuild_cnt++;
 
        switch (rs->raid_type->level) {
+       case 0:
+               break;
        case 1:
                if (rebuild_cnt >= rs->md.raid_disks)
                        goto too_many;
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
                case 0:
                        break;
                default:
+                       /*
+                        * We have to keep any raid0 data/metadata device pairs or
+                        * the MD raid0 personality will fail to start the array.
+                        */
+                       if (rs_is_raid0(rs))
+                               continue;
+
                        dev = container_of(rdev, struct raid_dev, rdev);
                        if (dev->meta_dev)
                                dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
                } else {
                        /* Process raid1 without delta_disks */
                        mddev->raid_disks = rs->raid_disks;
-                       set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                        reshape = false;
                }
        } else {
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
        if (reshape) {
                set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
        } else if (mddev->raid_disks < rs->raid_disks)
                /* Create new superblocks and bitmaps, if any new disks */
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                        goto bad;
 
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                /* Takeover ain't recovery, so disable recovery */
                rs_setup_recovery(rs, MaxSector);
                rs_set_new(rs);
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
-       if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
-               if (!rs->md.suspended)
-                       mddev_suspend(&rs->md);
-               rs->md.ro = 1;
-       }
+       if (!rs->md.suspended)
+               mddev_suspend(&rs->md);
+
+       rs->md.ro = 1;
 }
 
 static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 {
        int i;
-       uint64_t failed_devices, cleared_failed_devices = 0;
+       uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
        unsigned long flags;
+       bool cleared = false;
        struct dm_raid_superblock *sb;
+       struct mddev *mddev = &rs->md;
        struct md_rdev *r;
 
+       /* RAID personalities have to provide hot add/remove methods or we need to bail out. */
+       if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
+               return;
+
+       memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+
        for (i = 0; i < rs->md.raid_disks; i++) {
                r = &rs->dev[i].rdev;
                if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                         * ourselves.
                         */
                        if ((r->raid_disk >= 0) &&
-                           (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
+                           (mddev->pers->hot_remove_disk(mddev, r) != 0))
                                /* Failed to revive this device, try next */
                                continue;
 
@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                        clear_bit(Faulty, &r->flags);
                        clear_bit(WriteErrorSeen, &r->flags);
                        clear_bit(In_sync, &r->flags);
-                       if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+                       if (mddev->pers->hot_add_disk(mddev, r)) {
                                r->raid_disk = -1;
                                r->saved_raid_disk = -1;
                                r->flags = flags;
                        } else {
                                r->recovery_offset = 0;
-                               cleared_failed_devices |= 1 << i;
+                               set_bit(i, (void *) cleared_failed_devices);
+                               cleared = true;
                        }
                }
        }
-       if (cleared_failed_devices) {
+
+       /* If any failed devices could be cleared, update all sbs failed_devices bits */
+       if (cleared) {
+               uint64_t failed_devices[DISKS_ARRAY_ELEMS];
+
                rdev_for_each(r, &rs->md) {
                        sb = page_address(r->sb_page);
-                       failed_devices = le64_to_cpu(sb->failed_devices);
-                       failed_devices &= ~cleared_failed_devices;
-                       sb->failed_devices = cpu_to_le64(failed_devices);
+                       sb_retrieve_failed_devices(sb, failed_devices);
+
+                       for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
+                               failed_devices[i] &= ~cleared_failed_devices[i];
+
+                       sb_update_failed_devices(sb, failed_devices);
                }
        }
 }
@@ -3610,26 +3633,15 @@ static void raid_resume(struct dm_target *ti)
                 * devices are reachable again.
                 */
                attempt_restore_of_faulty_devices(rs);
-       } else {
-               mddev->ro = 0;
-               mddev->in_sync = 0;
+       }
 
-               /*
-                * When passing in flags to the ctr, we expect userspace
-                * to reset them because they made it to the superblocks
-                * and reload the mapping anyway.
-                *
-                * -> only unfreeze recovery in case of a table reload or
-                *    we'll have a bogus recovery/reshape position
-                *    retrieved from the superblock by the ctr because
-                *    the ongoing recovery/reshape will change it after read.
-                */
-               if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
-                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       mddev->ro = 0;
+       mddev->in_sync = 0;
 
-               if (mddev->suspended)
-                       mddev_resume(mddev);
-       }
+       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+       if (mddev->suspended)
+               mddev_resume(mddev);
 }
 
 static struct target_type raid_target = {
index 4ace1da..6c25213 100644 (file)
@@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
        struct path_info *pi = NULL;
        struct dm_path *current_path = NULL;
 
+       local_irq_save(flags);
        current_path = *this_cpu_ptr(s->current_path);
        if (current_path) {
                percpu_counter_dec(&s->repeat_count);
-               if (percpu_counter_read_positive(&s->repeat_count) > 0)
+               if (percpu_counter_read_positive(&s->repeat_count) > 0) {
+                       local_irq_restore(flags);
                        return current_path;
+               }
        }
 
-       spin_lock_irqsave(&s->lock, flags);
+       spin_lock(&s->lock);
        if (!list_empty(&s->valid_paths)) {
                pi = list_entry(s->valid_paths.next, struct path_info, list);
                list_move_tail(&pi->list, &s->valid_paths);
index 4387ccb..7410c6d 100644 (file)
@@ -69,5 +69,6 @@ OBJCOPYFLAGS :=
 OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
                        --set-section-flags .text=alloc,readonly \
                        --rename-section .text=.rodata
-$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o
+targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o
+$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE
        $(call if_changed,objcopy)
index bdee9a0..c466ee2 100644 (file)
@@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
         */
        mutex_lock(&afu->contexts_lock);
        idr_preload(GFP_KERNEL);
-       i = idr_alloc(&ctx->afu->contexts_idr, ctx,
-                     ctx->afu->adapter->native->sl_ops->min_pe,
+       i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
                      ctx->afu->num_procs, GFP_NOWAIT);
        idr_preload_end();
        mutex_unlock(&afu->contexts_lock);
index de09053..344a0ff 100644 (file)
@@ -561,7 +561,6 @@ struct cxl_service_layer_ops {
        u64 (*timebase_read)(struct cxl *adapter);
        int capi_mode;
        bool needs_reset_before_disable;
-       int min_pe;
 };
 
 struct cxl_native {
@@ -603,6 +602,7 @@ struct cxl {
        struct bin_attribute cxl_attr;
        int adapter_num;
        int user_irqs;
+       int min_pe;
        u64 ps_size;
        u16 psl_rev;
        u16 base_image;
index 3bcdaee..e606fdc 100644 (file)
@@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data)
        return fail_psl_irq(afu, &irq_info);
 }
 
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
 {
        u64 dsisr;
        int timeout = 1000;
index d152e2d..6f0c4ac 100644 (file)
@@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id
 
 static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
 {
-       u64 psl_dsnctl;
+       u64 psl_dsnctl, psl_fircntl;
        u64 chipid;
        u64 capp_unit_id;
        int rc;
@@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_
        cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
        /* snoop write mask */
        cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
-       /* set fir_accum */
-       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+       /* set fir_cntl to recommended value for production env */
+       psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+       psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+       psl_fircntl |= 0x1ULL; /* ce_thresh */
+       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
        /* for debugging with trace arrays */
        cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
 
@@ -1521,14 +1524,15 @@ static const struct cxl_service_layer_ops xsl_ops = {
        .write_timebase_ctrl = write_timebase_ctrl_xsl,
        .timebase_read = timebase_read_xsl,
        .capi_mode = OPAL_PHB_CAPI_MODE_DMA,
-       .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
 };
 
 static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
 {
        if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+               /* Mellanox CX-4 */
                dev_info(&adapter->dev, "Device uses an XSL\n");
                adapter->native->sl_ops = &xsl_ops;
+               adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
        } else {
                dev_info(&adapter->dev, "Device uses a PSL\n");
                adapter->native->sl_ops = &psl_ops;
index dee8def..3519ace 100644 (file)
@@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
        /* Setup the PHB using arch provided callback */
        phb->ops = &cxl_pcie_pci_ops;
        phb->cfg_addr = NULL;
-       phb->cfg_data = 0;
+       phb->cfg_data = NULL;
        phb->private_data = afu;
        phb->controller_ops = cxl_pci_controller_ops;
 
@@ -230,6 +230,11 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
        if (phb->bus == NULL)
                return -ENXIO;
 
+       /* Set release hook on root bus */
+       pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
+                                   pcibios_free_controller_deferred,
+                                   (void *) phb);
+
        /* Claim resources. This might need some rework as well depending
         * whether we are doing probe-only or not, like assigning unassigned
         * resources etc...
@@ -256,7 +261,10 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu)
        afu->phb = NULL;
 
        pci_remove_root_bus(phb->bus);
-       pcibios_free_controller(phb);
+       /*
+        * We don't free phb here - that's handled by
+        * pcibios_free_controller_deferred()
+        */
 }
 
 static bool _cxl_pci_is_vphb_device(struct pci_controller *phb)
index 5a3fd76..5525a20 100644 (file)
@@ -49,7 +49,7 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
 
        /* This is a pointer to outside our current stack frame. */
        if (bad_frame) {
-               bad_stack = do_usercopy_stack_callee((uintptr_t)bad_stack);
+               bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack);
        } else {
                /* Put start address just inside stack. */
                bad_stack = task_stack_page(current) + THREAD_SIZE;
index 48a5dd7..2206d44 100644 (file)
@@ -1726,6 +1726,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
                        break;
 
                if (req_op(next) == REQ_OP_DISCARD ||
+                   req_op(next) == REQ_OP_SECURE_ERASE ||
                    req_op(next) == REQ_OP_FLUSH)
                        break;
 
@@ -2150,6 +2151,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        struct mmc_card *card = md->queue.card;
        struct mmc_host *host = card->host;
        unsigned long flags;
+       bool req_is_special = mmc_req_is_special(req);
 
        if (req && !mq->mqrq_prev->req)
                /* claim host only for the first request */
@@ -2190,8 +2192,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        }
 
 out:
-       if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
-           mmc_req_is_special(req))
+       if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special)
                /*
                 * Release host when there are no more requests
                 * and after special request(discard, flush) is done.
index bf14642..7080572 100644 (file)
@@ -33,7 +33,8 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
        /*
         * We only like normal block requests and discards.
         */
-       if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) {
+       if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD &&
+           req_op(req) != REQ_OP_SECURE_ERASE) {
                blk_dump_rq_flags(req, "MMC bad request");
                return BLKPREP_KILL;
        }
@@ -64,6 +65,8 @@ static int mmc_queue_thread(void *d)
                spin_unlock_irq(q->queue_lock);
 
                if (req || mq->mqrq_prev->req) {
+                       bool req_is_special = mmc_req_is_special(req);
+
                        set_current_state(TASK_RUNNING);
                        mq->issue_fn(mq, req);
                        cond_resched();
@@ -79,7 +82,7 @@ static int mmc_queue_thread(void *d)
                         * has been finished. Do not assign it to previous
                         * request.
                         */
-                       if (mmc_req_is_special(req))
+                       if (req_is_special)
                                mq->mqrq_cur->req = NULL;
 
                        mq->mqrq_prev->brq.mrq.data = NULL;
index d625311..fee5e12 100644 (file)
@@ -4,7 +4,9 @@
 static inline bool mmc_req_is_special(struct request *req)
 {
        return req &&
-               (req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD);
+               (req_op(req) == REQ_OP_FLUSH ||
+                req_op(req) == REQ_OP_DISCARD ||
+                req_op(req) == REQ_OP_SECURE_ERASE);
 }
 
 struct request;
index 1f276fa..217e8da 100644 (file)
@@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0);
 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
                            "0 for slow, 1 for fast");
 module_param(ad_select, charp, 0);
-MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; "
+MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
                            "0 for stable (default), 1 for bandwidth, "
                            "2 for count");
 module_param(min_links, int, 0);
index 8f12bdd..a0b453e 100644 (file)
  * BCM5325 and BCM5365 share most definitions below
  */
 #define B53_ARLTBL_MAC_VID_ENTRY(n)    (0x10 * (n))
-#define   ARLTBL_MAC_MASK              0xffffffffffff
+#define   ARLTBL_MAC_MASK              0xffffffffffffULL
 #define   ARLTBL_VID_S                 48
 #define   ARLTBL_VID_MASK_25           0xff
 #define   ARLTBL_VID_MASK              0xfff
index 463bed8..dd446e4 100644 (file)
@@ -205,8 +205,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,        \
 static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
                                                u32 mask)               \
 {                                                                      \
-       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
        priv->irq##which##_mask &= ~(mask);                             \
+       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
 }                                                                      \
 static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
                                                u32 mask)               \
index d36aedd..7106790 100644 (file)
@@ -2656,15 +2656,19 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                        return ret;
        }
 
+       /* Rate Control: disable ingress rate limiting. */
        if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
            mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
-           mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
            mv88e6xxx_6320_family(chip)) {
-               /* Rate Control: disable ingress rate limiting. */
                ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
                                           PORT_RATE_CONTROL, 0x0001);
                if (ret)
                        return ret;
+       } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) {
+               ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+                                          PORT_RATE_CONTROL, 0x0000);
+               if (ret)
+                       return ret;
        }
 
        /* Port Control 1: disable trunking, disable sending
@@ -3187,6 +3191,7 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
        return err;
 }
 
+#ifdef CONFIG_NET_DSA_HWMON
 static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
                                    int reg)
 {
@@ -3212,6 +3217,7 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
 
        return ret;
 }
+#endif
 
 static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
 {
index 37a0f46..18bb955 100644 (file)
@@ -793,6 +793,8 @@ int xgene_enet_phy_connect(struct net_device *ndev)
                        netdev_err(ndev, "Could not connect to PHY\n");
                        return  -ENODEV;
                }
+#else
+               return -ENODEV;
 #endif
        }
 
index 4bff0f3..b0da969 100644 (file)
@@ -771,8 +771,10 @@ int arc_emac_probe(struct net_device *ndev, int interface)
        priv->dev = dev;
 
        priv->regs = devm_ioremap_resource(dev, &res_regs);
-       if (IS_ERR(priv->regs))
-               return PTR_ERR(priv->regs);
+       if (IS_ERR(priv->regs)) {
+               err = PTR_ERR(priv->regs);
+               goto out_put_node;
+       }
 
        dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs);
 
index 6453148..4eb17da 100644 (file)
@@ -1545,6 +1545,8 @@ static const struct pci_device_id alx_pci_tbl[] = {
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+       { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2500),
+         .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
index 0959e68..1fc2d85 100644 (file)
@@ -38,6 +38,7 @@
 #define ALX_DEV_ID_AR8161                              0x1091
 #define ALX_DEV_ID_E2200                               0xe091
 #define ALX_DEV_ID_E2400                               0xe0a1
+#define ALX_DEV_ID_E2500                               0xe0b1
 #define ALX_DEV_ID_AR8162                              0x1090
 #define ALX_DEV_ID_AR8171                              0x10A1
 #define ALX_DEV_ID_AR8172                              0x10A0
index 9a9745c..625235d 100644 (file)
@@ -159,7 +159,7 @@ static int bgmac_probe(struct bcma_device *core)
 
        if (!bgmac_is_bcm4707_family(core)) {
                mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
-               if (!IS_ERR(mii_bus)) {
+               if (IS_ERR(mii_bus)) {
                        err = PTR_ERR(mii_bus);
                        goto err;
                }
index ff300f7..6592612 100644 (file)
@@ -12552,10 +12552,6 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
                                info->data = TG3_RSS_MAX_NUM_QS;
                }
 
-               /* The first interrupt vector only
-                * handles link interrupts.
-                */
-               info->data -= 1;
                return 0;
 
        default:
@@ -14014,6 +14010,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        }
 
        if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
+           (!ec->rx_coalesce_usecs) ||
            (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
            (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
            (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
index 36893d8..b6fcf10 100644 (file)
 #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII    0x00000004
 #define MACB_CAPS_NO_GIGABIT_HALF              0x00000008
 #define MACB_CAPS_USRIO_DISABLED               0x00000010
+#define MACB_CAPS_JUMBO                                0x00000020
 #define MACB_CAPS_FIFO_MODE                    0x10000000
 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE       0x20000000
 #define MACB_CAPS_SG_DISABLED                  0x40000000
 #define MACB_CAPS_MACB_IS_GEM                  0x80000000
-#define MACB_CAPS_JUMBO                                0x00000010
 
 /* Bit manipulation macros */
 #define MACB_BIT(name)                                 \
index afb10e3..fab35a5 100644 (file)
 #define   NIC_QSET_SQ_0_7_DOOR                 (0x010838)
 #define   NIC_QSET_SQ_0_7_STATUS               (0x010840)
 #define   NIC_QSET_SQ_0_7_DEBUG                        (0x010848)
-#define   NIC_QSET_SQ_0_7_CNM_CHG              (0x010860)
 #define   NIC_QSET_SQ_0_7_STAT_0_1             (0x010900)
 
 #define   NIC_QSET_RBDR_0_1_CFG                        (0x010C00)
index d2d8ef2..ad4fddb 100644 (file)
@@ -382,7 +382,10 @@ static void nicvf_get_regs(struct net_device *dev,
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
-               p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q);
+               /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which
+                * produces bus errors when read
+                */
+               p[i++] = 0;
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
                reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
                p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
index c45de49..c762a8c 100644 (file)
@@ -4335,6 +4335,11 @@ static void cfg_queues(struct adapter *adap)
 #endif
        int ciq_size;
 
+       /* Reduce memory usage in kdump environment, disable all offload.
+        */
+       if (is_kdump_kernel())
+               adap->params.offload = 0;
+
        for_each_port(adap, i)
                n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 #ifdef CONFIG_CHELSIO_T4_DCB
@@ -4365,11 +4370,6 @@ static void cfg_queues(struct adapter *adap)
        if (q10g > netif_get_num_default_rss_queues())
                q10g = netif_get_num_default_rss_queues();
 
-       /* Reduce memory usage in kdump environment, disable all offload.
-        */
-       if (is_kdump_kernel())
-               adap->params.offload = 0;
-
        for_each_port(adap, i) {
                struct port_info *pi = adap2pinfo(adap, i);
 
index 1471e16..f45385f 100644 (file)
@@ -1299,6 +1299,7 @@ static int
 dm9000_open(struct net_device *dev)
 {
        struct board_info *db = netdev_priv(dev);
+       unsigned int irq_flags = irq_get_trigger_type(dev->irq);
 
        if (netif_msg_ifup(db))
                dev_dbg(db->dev, "enabling %s\n", dev->name);
@@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev)
        /* If there is no IRQ type specified, tell the user that this is a
         * problem
         */
-       if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
+       if (irq_flags == IRQF_TRIGGER_NONE)
                dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
+       irq_flags |= IRQF_SHARED;
+
        /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
        iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
        mdelay(1); /* delay needs by DM9000B */
@@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev)
        /* Initialize DM9000 board */
        dm9000_init_dm9000(dev);
 
-       if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
-                       dev->name, dev))
+       if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev))
                return -EAGAIN;
        /* Now that we have an interrupt handler hooked up we can unmask
         * our interrupts
index d20935d..4b4f5bc 100644 (file)
@@ -2922,17 +2922,25 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 {
        unsigned int size = lstatus & BD_LENGTH_MASK;
        struct page *page = rxb->page;
+       bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
 
        /* Remove the FCS from the packet length */
-       if (likely(lstatus & BD_LFLAG(RXBD_LAST)))
+       if (last)
                size -= ETH_FCS_LEN;
 
-       if (likely(first))
+       if (likely(first)) {
                skb_put(skb, size);
-       else
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                               rxb->page_offset + RXBUF_ALIGNMENT,
-                               size, GFAR_RXB_TRUESIZE);
+       } else {
+               /* the last fragments' length contains the full frame length */
+               if (last)
+                       size -= skb->len;
+
+               /* in case the last fragment consisted only of the FCS */
+               if (size > 0)
+                       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                                       rxb->page_offset + RXBUF_ALIGNMENT,
+                                       size, GFAR_RXB_TRUESIZE);
+       }
 
        /* try reuse page */
        if (unlikely(page_count(page) != 1))
index 373fd09..6e8a9c8 100644 (file)
@@ -100,7 +100,8 @@ extern const char gfar_driver_version[];
 #define DEFAULT_RX_LFC_THR  16
 #define DEFAULT_LFC_PTVVAL  4
 
-#define GFAR_RXB_SIZE 1536
+/* prevent fragmenation by HW in DSA environments */
+#define GFAR_RXB_SIZE roundup(1536 + 8, 64)
 #define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \
                          + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define GFAR_RXB_TRUESIZE 2048
index 1235c7f..1e1eb92 100644 (file)
@@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = {
        {"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)},
        {"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)},
        {"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)},
-       {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
+       {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
        {"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)},
        {"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)},
        {"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)},
index ff8b6a4..6ea8722 100644 (file)
@@ -328,9 +328,10 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
 {
        u32 port;
-       struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
 
        if (ppe_cb->ppe_common_cb) {
+               struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
+
                port = ppe_cb->index;
                dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0);
        }
index 7fd4d54..6b03c85 100644 (file)
@@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = {
                                  | FLAG2_DISABLE_ASPM_L0S
                                  | FLAG2_DISABLE_ASPM_L1
                                  | FLAG2_NO_DISABLE_RX
-                                 | FLAG2_DMA_BURST,
+                                 | FLAG2_DMA_BURST
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
        .pba                    = 32,
        .max_hw_frame_size      = DEFAULT_JUMBO,
        .get_variants           = e1000_get_variants_82571,
@@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = {
                                  | FLAG_HAS_CTRLEXT_ON_LOAD,
        .flags2                 = FLAG2_DISABLE_ASPM_L0S
                                  | FLAG2_DISABLE_ASPM_L1
-                                 | FLAG2_NO_DISABLE_RX,
+                                 | FLAG2_NO_DISABLE_RX
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
        .pba                    = 32,
        .max_hw_frame_size      = DEFAULT_JUMBO,
        .get_variants           = e1000_get_variants_82571,
index ef96cd1..879cca4 100644 (file)
@@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
 #define FLAG2_PCIM2PCI_ARBITER_WA         BIT(11)
 #define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
 #define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
 
 #define E1000_RX_DESC_PS(R, i)     \
        (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
index 3e11322..f3aaca7 100644 (file)
@@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = {
                                  | FLAG_HAS_JUMBO_FRAMES
                                  | FLAG_APME_IN_WUC,
        .flags2                 = FLAG2_HAS_PHY_STATS
-                                 | FLAG2_HAS_EEE,
+                                 | FLAG2_HAS_EEE
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
        .pba                    = 26,
        .max_hw_frame_size      = 9022,
        .get_variants           = e1000_get_variants_ich8lan,
index 02f4439..7017281 100644 (file)
@@ -4302,6 +4302,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
        clear_bit(__E1000_RESETTING, &adapter->state);
 }
 
+/**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: cycle_t value read, sanitized and returned
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+{
+       u64 time_delta, rem, temp;
+       cycle_t systim_next;
+       u32 incvalue;
+       int i;
+
+       incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+       for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+               /* latch SYSTIMH on read of SYSTIML */
+               systim_next = (cycle_t)er32(SYSTIML);
+               systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+               time_delta = systim_next - systim;
+               temp = time_delta;
+               /* VMWare users have seen incvalue of zero, don't div / 0 */
+               rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+               systim = systim_next;
+
+               if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+                       break;
+       }
+
+       return systim;
+}
+
 /**
  * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
  * @cc: cyclecounter structure
@@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
                                                     cc);
        struct e1000_hw *hw = &adapter->hw;
        u32 systimel, systimeh;
-       cycle_t systim, systim_next;
+       cycle_t systim;
        /* SYSTIMH latching upon SYSTIML read does not work well.
         * This means that if SYSTIML overflows after we read it but before
         * we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
        systim = (cycle_t)systimel;
        systim |= (cycle_t)systimeh << 32;
 
-       if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
-               u64 time_delta, rem, temp;
-               u32 incvalue;
-               int i;
-
-               /* errata for 82574/82583 possible bad bits read from SYSTIMH/L
-                * check to see that the time is incrementing at a reasonable
-                * rate and is a multiple of incvalue
-                */
-               incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
-               for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
-                       /* latch SYSTIMH on read of SYSTIML */
-                       systim_next = (cycle_t)er32(SYSTIML);
-                       systim_next |= (cycle_t)er32(SYSTIMH) << 32;
-
-                       time_delta = systim_next - systim;
-                       temp = time_delta;
-                       /* VMWare users have seen incvalue of zero, don't div / 0 */
-                       rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
-
-                       systim = systim_next;
+       if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+               systim = e1000e_sanitize_systim(hw, systim);
 
-                       if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
-                           (rem == 0))
-                               break;
-               }
-       }
        return systim;
 }
 
index e1370c5..618f184 100644 (file)
@@ -199,6 +199,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
 void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
 {
        struct i40e_client_instance *cdev;
+       int ret = 0;
 
        if (!vsi)
                return;
@@ -211,7 +212,14 @@ void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
                                        "Cannot locate client instance open routine\n");
                                continue;
                        }
-                       cdev->client->ops->open(&cdev->lan_info, cdev->client);
+                       if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                      &cdev->state))) {
+                               ret = cdev->client->ops->open(&cdev->lan_info,
+                                                             cdev->client);
+                               if (!ret)
+                                       set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                               &cdev->state);
+                       }
                }
        }
        mutex_unlock(&i40e_client_instance_mutex);
@@ -407,12 +415,14 @@ struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
  * @client: pointer to a client struct in the client list.
+ * @existing: if there was already an existing instance
  *
- * Returns cdev ptr on success, NULL on failure
+ * Returns cdev ptr on success or if already exists, NULL on failure
  **/
 static
 struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
-                                                     struct i40e_client *client)
+                                                    struct i40e_client *client,
+                                                    bool *existing)
 {
        struct i40e_client_instance *cdev;
        struct netdev_hw_addr *mac = NULL;
@@ -421,7 +431,7 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
        mutex_lock(&i40e_client_instance_mutex);
        list_for_each_entry(cdev, &i40e_client_instances, list) {
                if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
-                       cdev = NULL;
+                       *existing = true;
                        goto out;
                }
        }
@@ -505,6 +515,7 @@ void i40e_client_subtask(struct i40e_pf *pf)
 {
        struct i40e_client_instance *cdev;
        struct i40e_client *client;
+       bool existing = false;
        int ret = 0;
 
        if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
@@ -528,18 +539,25 @@ void i40e_client_subtask(struct i40e_pf *pf)
                        /* check if L2 VSI is up, if not we are not ready */
                        if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
                                continue;
+               } else {
+                       dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n",
+                                client->name);
                }
 
                /* Add the client instance to the instance list */
-               cdev = i40e_client_add_instance(pf, client);
+               cdev = i40e_client_add_instance(pf, client, &existing);
                if (!cdev)
                        continue;
 
-               /* Also up the ref_cnt of no. of instances of this client */
-               atomic_inc(&client->ref_cnt);
-               dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
-                        client->name, pf->hw.pf_id,
-                        pf->hw.bus.device, pf->hw.bus.func);
+               if (!existing) {
+                       /* Also up the ref_cnt for no. of instances of this
+                        * client.
+                        */
+                       atomic_inc(&client->ref_cnt);
+                       dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+                                client->name, pf->hw.pf_id,
+                                pf->hw.bus.device, pf->hw.bus.func);
+               }
 
                /* Send an Open request to the client */
                atomic_inc(&cdev->ref_cnt);
@@ -588,7 +606,8 @@ int i40e_lan_add_device(struct i40e_pf *pf)
                 pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
 
        /* Since in some cases register may have happened before a device gets
-        * added, we can schedule a subtask to go initiate the clients.
+        * added, we can schedule a subtask to go initiate the clients if
+        * they can be launched at probe time.
         */
        pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
        i40e_service_event_schedule(pf);
index 81c99e1..828ed28 100644 (file)
@@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
  **/
 static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
 {
+       int i, tc_unused = 0;
        u8 num_tc = 0;
-       int i;
+       u8 ret = 0;
 
        /* Scan the ETS Config Priority Table to find
         * traffic class enabled for a given priority
-        * and use the traffic class index to get the
-        * number of traffic classes enabled
+        * and create a bitmask of enabled TCs
         */
-       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-               if (dcbcfg->etscfg.prioritytable[i] > num_tc)
-                       num_tc = dcbcfg->etscfg.prioritytable[i];
-       }
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+               num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
 
-       /* Traffic class index starts from zero so
-        * increment to return the actual count
+       /* Now scan the bitmask to check for
+        * contiguous TCs starting with TC0
         */
-       return num_tc + 1;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (num_tc & BIT(i)) {
+                       if (!tc_unused) {
+                               ret++;
+                       } else {
+                               pr_err("Non-contiguous TC - Disabling DCB\n");
+                               return 1;
+                       }
+               } else {
+                       tc_unused = 1;
+               }
+       }
+
+       /* There is always at least TC0 */
+       if (!ret)
+               ret = 1;
+
+       return ret;
 }
 
 /**
@@ -5416,7 +5431,6 @@ int i40e_open(struct net_device *netdev)
        wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
 
        udp_tunnel_get_rx_info(netdev);
-       i40e_notify_client_of_netdev_open(vsi);
 
        return 0;
 }
index e61b647..336c103 100644 (file)
@@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
                }
        }
 
-       shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
+       shhwtstamps.hwtstamp =
+               ktime_add_ns(shhwtstamps.hwtstamp, adjust);
 
        skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
        dev_kfree_skb_any(adapter->ptp_tx_skb);
@@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
                         struct sk_buff *skb)
 {
        __le64 *regval = (__le64 *)va;
+       struct igb_adapter *adapter = q_vector->adapter;
+       int adjust = 0;
 
        /* The timestamp is recorded in little endian format.
         * DWORD: 0        1        2        3
         * Field: Reserved Reserved SYSTIML  SYSTIMH
         */
-       igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+       igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
                                   le64_to_cpu(regval[1]));
+
+       /* adjust timestamp for the RX latency based on link speed */
+       if (adapter->hw.mac.type == e1000_i210) {
+               switch (adapter->link_speed) {
+               case SPEED_10:
+                       adjust = IGB_I210_RX_LATENCY_10;
+                       break;
+               case SPEED_100:
+                       adjust = IGB_I210_RX_LATENCY_100;
+                       break;
+               case SPEED_1000:
+                       adjust = IGB_I210_RX_LATENCY_1000;
+                       break;
+               }
+       }
+       skb_hwtstamps(skb)->hwtstamp =
+               ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 }
 
 /**
@@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
                }
        }
        skb_hwtstamps(skb)->hwtstamp =
-               ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+               ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 
        /* Update the last_rx_timestamp timer in order to enable watchdog check
         * for error case of latched timestamp on a dropped packet.
index b4217f3..c47b605 100644 (file)
@@ -2958,8 +2958,10 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
        }
 
        /* was that the last pool using this rar? */
-       if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+       if (mpsar_lo == 0 && mpsar_hi == 0 &&
+           rar != 0 && rar != hw->mac.san_mac_rar_index)
                hw->mac.ops.clear_rar(hw, rar);
+
        return 0;
 }
 
index 5418c69..b4f0374 100644 (file)
@@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
        struct ixgbe_hw *hw = &adapter->hw;
        u32 vlnctrl, i;
 
+       vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
        switch (hw->mac.type) {
        case ixgbe_mac_82599EB:
        case ixgbe_mac_X540:
@@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
                /* fall through */
        case ixgbe_mac_82598EB:
                /* legacy case, we can just disable VLAN filtering */
-               vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-               vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+               vlnctrl &= ~IXGBE_VLNCTRL_VFE;
                IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
                return;
        }
@@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
        /* Set flag so we don't redo unnecessary work */
        adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
 
+       /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+       vlnctrl |= IXGBE_VLNCTRL_VFE;
+       IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
        /* Add PF to all active pools */
        for (i = IXGBE_VLVF_ENTRIES; --i;) {
                u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
@@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
        struct ixgbe_hw *hw = &adapter->hw;
        u32 vlnctrl, i;
 
+       /* Set VLAN filtering to enabled */
+       vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+       vlnctrl |= IXGBE_VLNCTRL_VFE;
+       IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
        switch (hw->mac.type) {
        case ixgbe_mac_82599EB:
        case ixgbe_mac_X540:
@@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
                        break;
                /* fall through */
        case ixgbe_mac_82598EB:
-               vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-               vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
-               vlnctrl |= IXGBE_VLNCTRL_VFE;
-               IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
                return;
        }
 
@@ -8390,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
                            struct tcf_exts *exts, u64 *action, u8 *queue)
 {
        const struct tc_action *a;
+       LIST_HEAD(actions);
        int err;
 
        if (tc_no_actions(exts))
                return -EINVAL;
 
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
 
                /* Drop action */
                if (is_tcf_gact_shot(a)) {
@@ -9517,6 +9525,7 @@ skip_sriov:
 
        /* copy netdev features into list of user selectable features */
        netdev->hw_features |= netdev->features |
+                              NETIF_F_HW_VLAN_CTAG_FILTER |
                               NETIF_F_HW_VLAN_CTAG_RX |
                               NETIF_F_HW_VLAN_CTAG_TX |
                               NETIF_F_RXALL |
index b57ae3a..f160954 100644 (file)
@@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac)
        case PHY_INTERFACE_MODE_MII:
                ge_mode = 1;
                break;
-       case PHY_INTERFACE_MODE_RMII:
+       case PHY_INTERFACE_MODE_REVMII:
                ge_mode = 2;
                break;
+       case PHY_INTERFACE_MODE_RMII:
+               if (!mac->id)
+                       goto err_phy;
+               ge_mode = 3;
+               break;
        default:
-               dev_err(eth->dev, "invalid phy_mode\n");
-               return -1;
+               goto err_phy;
        }
 
        /* put the gmac into the right mode */
@@ -263,13 +267,25 @@ static int mtk_phy_connect(struct mtk_mac *mac)
        mac->phy_dev->autoneg = AUTONEG_ENABLE;
        mac->phy_dev->speed = 0;
        mac->phy_dev->duplex = 0;
+
+       if (of_phy_is_fixed_link(mac->of_node))
+               mac->phy_dev->supported |=
+               SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
        mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
                                   SUPPORTED_Asym_Pause;
        mac->phy_dev->advertising = mac->phy_dev->supported |
                                    ADVERTISED_Autoneg;
        phy_start_aneg(mac->phy_dev);
 
+       of_node_put(np);
+
        return 0;
+
+err_phy:
+       of_node_put(np);
+       dev_err(eth->dev, "invalid phy_mode\n");
+       return -EINVAL;
 }
 
 static int mtk_mdio_init(struct mtk_eth *eth)
@@ -542,15 +558,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
        return &ring->buf[idx];
 }
 
-static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
 {
        if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
-               dma_unmap_single(dev,
+               dma_unmap_single(eth->dev,
                                 dma_unmap_addr(tx_buf, dma_addr0),
                                 dma_unmap_len(tx_buf, dma_len0),
                                 DMA_TO_DEVICE);
        } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
-               dma_unmap_page(dev,
+               dma_unmap_page(eth->dev,
                               dma_unmap_addr(tx_buf, dma_addr0),
                               dma_unmap_len(tx_buf, dma_len0),
                               DMA_TO_DEVICE);
@@ -595,9 +611,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
        if (skb_vlan_tag_present(skb))
                txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
 
-       mapped_addr = dma_map_single(&dev->dev, skb->data,
+       mapped_addr = dma_map_single(eth->dev, skb->data,
                                     skb_headlen(skb), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+       if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
                return -ENOMEM;
 
        WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -623,10 +639,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
                        n_desc++;
                        frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
-                       mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+                       mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
                                                       frag_map_size,
                                                       DMA_TO_DEVICE);
-                       if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+                       if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
                                goto err_dma;
 
                        if (i == nr_frags - 1 &&
@@ -679,7 +695,7 @@ err_dma:
                tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
                /* unmap dma */
-               mtk_tx_unmap(&dev->dev, tx_buf);
+               mtk_tx_unmap(eth, tx_buf);
 
                itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
                itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
@@ -836,11 +852,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
                        netdev->stats.rx_dropped++;
                        goto release_desc;
                }
-               dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+               dma_addr = dma_map_single(eth->dev,
                                          new_data + NET_SKB_PAD,
                                          ring->buf_size,
                                          DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+               if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
                        skb_free_frag(new_data);
                        netdev->stats.rx_dropped++;
                        goto release_desc;
@@ -855,7 +871,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
                }
                skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
-               dma_unmap_single(&netdev->dev, trxd.rxd1,
+               dma_unmap_single(eth->dev, trxd.rxd1,
                                 ring->buf_size, DMA_FROM_DEVICE);
                pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
                skb->dev = netdev;
@@ -937,7 +953,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
                        done[mac]++;
                        budget--;
                }
-               mtk_tx_unmap(eth->dev, tx_buf);
+               mtk_tx_unmap(eth, tx_buf);
 
                ring->last_free = desc;
                atomic_inc(&ring->free_count);
@@ -1092,7 +1108,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
 
        if (ring->buf) {
                for (i = 0; i < MTK_DMA_SIZE; i++)
-                       mtk_tx_unmap(eth->dev, &ring->buf[i]);
+                       mtk_tx_unmap(eth, &ring->buf[i]);
                kfree(ring->buf);
                ring->buf = NULL;
        }
@@ -1751,6 +1767,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
                goto free_netdev;
        }
        spin_lock_init(&mac->hw_stats->stats_lock);
+       u64_stats_init(&mac->hw_stats->syncp);
        mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
        SET_NETDEV_DEV(eth->netdev[id], eth->dev);
index d6e2a1c..c2ec01a 100644 (file)
@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
        return cmd->cmd_buf + (idx << cmd->log_stride);
 }
 
-static u8 xor8_buf(void *buf, int len)
+static u8 xor8_buf(void *buf, size_t offset, int len)
 {
        u8 *ptr = buf;
        u8 sum = 0;
        int i;
+       int end = len + offset;
 
-       for (i = 0; i < len; i++)
+       for (i = offset; i < end; i++)
                sum ^= ptr[i];
 
        return sum;
@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len)
 
 static int verify_block_sig(struct mlx5_cmd_prot_block *block)
 {
-       if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+       size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+       int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+       if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
                return -EINVAL;
 
-       if (xor8_buf(block, sizeof(*block)) != 0xff)
+       if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
                return -EINVAL;
 
        return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
-                          int csum)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
 {
-       block->token = token;
-       if (csum) {
-               block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
-                                           sizeof(block->data) - 2);
-               block->sig = ~xor8_buf(block, sizeof(*block) - 1);
-       }
+       int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+       size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+       block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+       block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
 {
        struct mlx5_cmd_mailbox *next = msg->next;
-
-       while (next) {
-               calc_block_sig(next->buf, token, csum);
+       int size = msg->len;
+       int blen = size - min_t(int, sizeof(msg->first.data), size);
+       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+               / MLX5_CMD_DATA_BLOCK_SIZE;
+       int i = 0;
+
+       for (i = 0; i < n && next; i++)  {
+               calc_block_sig(next->buf);
                next = next->next;
        }
 }
 
 static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
-       ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-       calc_chain_sig(ent->in, ent->token, csum);
-       calc_chain_sig(ent->out, ent->token, csum);
+       ent->lay->sig = ~xor8_buf(ent->lay, 0,  sizeof(*ent->lay));
+       if (csum) {
+               calc_chain_sig(ent->in);
+               calc_chain_sig(ent->out);
+       }
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
        struct mlx5_cmd_mailbox *next = ent->out->next;
        int err;
        u8 sig;
+       int size = ent->out->len;
+       int blen = size - min_t(int, sizeof(ent->out->first.data), size);
+       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+               / MLX5_CMD_DATA_BLOCK_SIZE;
+       int i = 0;
 
-       sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+       sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
        if (sig != 0xff)
                return -EINVAL;
 
-       while (next) {
+       for (i = 0; i < n && next; i++) {
                err = verify_block_sig(next->buf);
                if (err)
                        return err;
@@ -656,7 +670,6 @@ static void cmd_work_handler(struct work_struct *work)
                spin_unlock_irqrestore(&cmd->alloc_lock, flags);
        }
 
-       ent->token = alloc_token(cmd);
        cmd->ent_arr[ent->idx] = ent;
        lay = get_inst(cmd, ent->idx);
        ent->lay = lay;
@@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
                           struct mlx5_cmd_msg *out, void *uout, int uout_size,
                           mlx5_cmd_cbk_t callback,
-                          void *context, int page_queue, u8 *status)
+                          void *context, int page_queue, u8 *status,
+                          u8 token)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
        struct mlx5_cmd_work_ent *ent;
@@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        if (IS_ERR(ent))
                return PTR_ERR(ent);
 
+       ent->token = token;
+
        if (!callback)
                init_completion(&ent->done);
 
@@ -854,7 +870,8 @@ static const struct file_operations fops = {
        .write  = dbg_write,
 };
 
-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+                           u8 token)
 {
        struct mlx5_cmd_prot_block *block;
        struct mlx5_cmd_mailbox *next;
@@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
                memcpy(block->data, from, copy);
                from += copy;
                size -= copy;
+               block->token = token;
                next = next->next;
        }
 
@@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev,
 }
 
 static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
-                                              gfp_t flags, int size)
+                                              gfp_t flags, int size,
+                                              u8 token)
 {
        struct mlx5_cmd_mailbox *tmp, *head = NULL;
        struct mlx5_cmd_prot_block *block;
@@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
                tmp->next = head;
                block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
                block->block_num = cpu_to_be32(n - i - 1);
+               block->token = token;
                head = tmp;
        }
        msg->next = head;
@@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
        }
 
        if (IS_ERR(msg))
-               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 
        return msg;
 }
@@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
        int err;
        u8 status = 0;
        u32 drv_synd;
+       u8 token;
 
        if (pci_channel_offline(dev->pdev) ||
            dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                return err;
        }
 
-       err = mlx5_copy_to_msg(inb, in, in_size);
+       token = alloc_token(&dev->cmd);
+
+       err = mlx5_copy_to_msg(inb, in, in_size, token);
        if (err) {
                mlx5_core_warn(dev, "err %d\n", err);
                goto out_in;
        }
 
-       outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+       outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
        if (IS_ERR(outb)) {
                err = PTR_ERR(outb);
                goto out_in;
        }
 
        err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
-                             pages_queue, &status);
+                             pages_queue, &status, token);
        if (err)
                goto out_out;
 
@@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
        INIT_LIST_HEAD(&cmd->cache.med.head);
 
        for (i = 0; i < NUM_LONG_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
                if (IS_ERR(msg)) {
                        err = PTR_ERR(msg);
                        goto ex_err;
@@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
        }
 
        for (i = 0; i < NUM_MED_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
                if (IS_ERR(msg)) {
                        err = PTR_ERR(msg);
                        goto ex_err;
index 1b495ef..bf722aa 100644 (file)
 #define MLX5_MPWRQ_PAGES_PER_WQE               BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 #define MLX5_MPWRQ_STRIDES_PER_PAGE            (MLX5_MPWRQ_NUM_STRIDES >> \
                                                 MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \
-                                  BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW))
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
+       (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+
 #define MLX5_UMR_ALIGN                         (2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (128)
 
@@ -219,9 +223,8 @@ struct mlx5e_tstamp {
 };
 
 enum {
-       MLX5E_RQ_STATE_POST_WQES_ENABLE,
+       MLX5E_RQ_STATE_FLUSH,
        MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
-       MLX5E_RQ_STATE_FLUSH_TIMEOUT,
        MLX5E_RQ_STATE_AM,
 };
 
@@ -304,6 +307,7 @@ struct mlx5e_rq {
 
        unsigned long          state;
        int                    ix;
+       u32                    mpwqe_mtt_offset;
 
        struct mlx5e_rx_am     am; /* Adaptive Moderation */
 
@@ -365,9 +369,8 @@ struct mlx5e_sq_dma {
 };
 
 enum {
-       MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+       MLX5E_SQ_STATE_FLUSH,
        MLX5E_SQ_STATE_BF_ENABLE,
-       MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -698,7 +701,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
@@ -814,11 +816,6 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
                     MLX5E_MAX_NUM_CHANNELS);
 }
 
-static inline int mlx5e_get_mtt_octw(int npages)
-{
-       return ALIGN(npages, 8) / 2;
-}
-
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
index 673043c..9cce153 100644 (file)
@@ -139,7 +139,7 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
        struct mlx5e_tir *tir;
        void *in;
        int inlen;
-       int err;
+       int err = 0;
 
        inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
        in = mlx5_vzalloc(inlen);
@@ -151,10 +151,11 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
        list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
                err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
                if (err)
-                       return err;
+                       goto out;
        }
 
+out:
        kvfree(in);
 
-       return 0;
+       return err;
 }
index caa9a3c..762af16 100644 (file)
@@ -127,29 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
        return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
 }
 
-static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+                                   struct ieee_ets *ets)
 {
        int bw_sum = 0;
        int i;
 
        /* Validate Priority */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY)
+               if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+                       netdev_err(netdev,
+                                  "Failed to validate ETS: priority value greater than max(%d)\n",
+                                   MLX5E_MAX_PRIORITY);
                        return -EINVAL;
+               }
        }
 
        /* Validate Bandwidth Sum */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
-                       if (!ets->tc_tx_bw[i])
+                       if (!ets->tc_tx_bw[i]) {
+                               netdev_err(netdev,
+                                          "Failed to validate ETS: BW 0 is illegal\n");
                                return -EINVAL;
+                       }
 
                        bw_sum += ets->tc_tx_bw[i];
                }
        }
 
-       if (bw_sum != 0 && bw_sum != 100)
+       if (bw_sum != 0 && bw_sum != 100) {
+               netdev_err(netdev,
+                          "Failed to validate ETS: BW sum is illegal\n");
                return -EINVAL;
+       }
        return 0;
 }
 
@@ -159,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
        struct mlx5e_priv *priv = netdev_priv(netdev);
        int err;
 
-       err = mlx5e_dbcnl_validate_ets(ets);
+       err = mlx5e_dbcnl_validate_ets(netdev, ets);
        if (err)
                return err;
 
index 4a3757e..d0cf8fa 100644 (file)
@@ -352,15 +352,61 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                                                                   sq_stats_desc, j);
 }
 
+static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
+                                   int num_wqe)
+{
+       int packets_per_wqe;
+       int stride_size;
+       int num_strides;
+       int wqe_size;
+
+       if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+               return num_wqe;
+
+       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       wqe_size = stride_size * num_strides;
+
+       packets_per_wqe = wqe_size /
+                         ALIGN(ETH_DATA_LEN, stride_size);
+       return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
+}
+
+static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
+                                   int num_packets)
+{
+       int packets_per_wqe;
+       int stride_size;
+       int num_strides;
+       int wqe_size;
+       int num_wqes;
+
+       if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+               return num_packets;
+
+       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       wqe_size = stride_size * num_strides;
+
+       num_packets = (1 << order_base_2(num_packets));
+
+       packets_per_wqe = wqe_size /
+                         ALIGN(ETH_DATA_LEN, stride_size);
+       num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
+       return 1 << (order_base_2(num_wqes));
+}
+
 static void mlx5e_get_ringparam(struct net_device *dev,
                                struct ethtool_ringparam *param)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        int rq_wq_type = priv->params.rq_wq_type;
 
-       param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type);
+       param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                                        1 << mlx5_max_log_rq_size(rq_wq_type));
        param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
-       param->rx_pending     = 1 << priv->params.log_rq_size;
+       param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                                    1 << priv->params.log_rq_size);
        param->tx_pending     = 1 << priv->params.log_sq_size;
 }
 
@@ -370,9 +416,13 @@ static int mlx5e_set_ringparam(struct net_device *dev,
        struct mlx5e_priv *priv = netdev_priv(dev);
        bool was_opened;
        int rq_wq_type = priv->params.rq_wq_type;
+       u32 rx_pending_wqes;
+       u32 min_rq_size;
+       u32 max_rq_size;
        u16 min_rx_wqes;
        u8 log_rq_size;
        u8 log_sq_size;
+       u32 num_mtts;
        int err = 0;
 
        if (param->rx_jumbo_pending) {
@@ -385,18 +435,36 @@ static int mlx5e_set_ringparam(struct net_device *dev,
                            __func__);
                return -EINVAL;
        }
-       if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) {
+
+       min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                              1 << mlx5_min_log_rq_size(rq_wq_type));
+       max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                              1 << mlx5_max_log_rq_size(rq_wq_type));
+       rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
+                                                  param->rx_pending);
+
+       if (param->rx_pending < min_rq_size) {
                netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n",
                            __func__, param->rx_pending,
-                           1 << mlx5_min_log_rq_size(rq_wq_type));
+                           min_rq_size);
                return -EINVAL;
        }
-       if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) {
+       if (param->rx_pending > max_rq_size) {
                netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n",
                            __func__, param->rx_pending,
-                           1 << mlx5_max_log_rq_size(rq_wq_type));
+                           max_rq_size);
                return -EINVAL;
        }
+
+       num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels,
+                                      rx_pending_wqes);
+       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+           !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+               netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
+                           __func__, param->rx_pending);
+               return -EINVAL;
+       }
+
        if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
                netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n",
                            __func__, param->tx_pending,
@@ -410,9 +478,9 @@ static int mlx5e_set_ringparam(struct net_device *dev,
                return -EINVAL;
        }
 
-       log_rq_size = order_base_2(param->rx_pending);
+       log_rq_size = order_base_2(rx_pending_wqes);
        log_sq_size = order_base_2(param->tx_pending);
-       min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending);
+       min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes);
 
        if (log_rq_size == priv->params.log_rq_size &&
            log_sq_size == priv->params.log_sq_size &&
@@ -454,6 +522,7 @@ static int mlx5e_set_channels(struct net_device *dev,
        unsigned int count = ch->combined_count;
        bool arfs_enabled;
        bool was_opened;
+       u32 num_mtts;
        int err = 0;
 
        if (!count) {
@@ -472,6 +541,14 @@ static int mlx5e_set_channels(struct net_device *dev,
                return -EINVAL;
        }
 
+       num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
+       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+           !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+               netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n",
+                           __func__, count);
+               return -EINVAL;
+       }
+
        if (priv->params.num_channels == count)
                return 0;
 
index 870bea3..2459c7f 100644 (file)
 #include "eswitch.h"
 #include "vxlan.h"
 
-enum {
-       MLX5_EN_QP_FLUSH_TIMEOUT_MS     = 5000,
-       MLX5_EN_QP_FLUSH_MSLEEP_QUANT   = 20,
-       MLX5_EN_QP_FLUSH_MAX_ITER       = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
-                                         MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
-};
-
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
        struct mlx5_wq_param    wq;
@@ -162,6 +155,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
                        s->tx_queue_dropped     += sq_stats->dropped;
+                       s->tx_xmit_more         += sq_stats->xmit_more;
                        s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
                        tx_offload_none         += sq_stats->csum_none;
                }
@@ -340,6 +334,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
+               rq->mpwqe_mtt_offset = c->ix *
+                       MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
+
                rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
                rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
                rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
@@ -428,7 +425,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 
        MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
        MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
-       MLX5_SET(rqc,  rqc, flush_in_error_en,  1);
        MLX5_SET(rqc,  rqc, vsd, priv->params.vlan_strip_disable);
        MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
                                                MLX5_ADAPTER_PAGE_SHIFT);
@@ -525,6 +521,27 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
        return -ETIMEDOUT;
 }
 
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+       struct mlx5_wq_ll *wq = &rq->wq;
+       struct mlx5e_rx_wqe *wqe;
+       __be16 wqe_ix_be;
+       u16 wqe_ix;
+
+       /* UMR WQE (if in progress) is always at wq->head */
+       if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+               mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]);
+
+       while (!mlx5_wq_ll_is_empty(wq)) {
+               wqe_ix_be = *wq->tail_next;
+               wqe_ix    = be16_to_cpu(wqe_ix_be);
+               wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+               rq->dealloc_wqe(rq, wqe_ix);
+               mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+                              &wqe->next.next_wqe_index);
+       }
+}
+
 static int mlx5e_open_rq(struct mlx5e_channel *c,
                         struct mlx5e_rq_param *param,
                         struct mlx5e_rq *rq)
@@ -548,8 +565,6 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
        if (param->am_enabled)
                set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-       set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
-
        sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
        sq->ico_wqe_info[pi].num_wqebbs = 1;
        mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
@@ -566,23 +581,8 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
-       int tout = 0;
-       int err;
-
-       clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
+       set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state);
        napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
-
-       err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-       while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
-              tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
-               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-
-       if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
-               set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
-
-       /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
-       napi_synchronize(&rq->channel->napi);
-
        cancel_work_sync(&rq->am.work);
 
        mlx5e_disable_rq(rq);
@@ -821,7 +821,6 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
                goto err_disable_sq;
 
        if (sq->txq) {
-               set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
                netdev_tx_reset_queue(sq->txq);
                netif_tx_start_queue(sq->txq);
        }
@@ -845,38 +844,20 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-       int tout = 0;
-       int err;
+       set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
+       /* prevent netif_tx_wake_queue */
+       napi_synchronize(&sq->channel->napi);
 
        if (sq->txq) {
-               clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
-               /* prevent netif_tx_wake_queue */
-               napi_synchronize(&sq->channel->napi);
                netif_tx_disable_queue(sq->txq);
 
-               /* ensure hw is notified of all pending wqes */
+               /* last doorbell out, godspeed .. */
                if (mlx5e_sq_has_room_for(sq, 1))
                        mlx5e_send_nop(sq, true);
-
-               err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
-                                     MLX5_SQC_STATE_ERR, false, 0);
-               if (err)
-                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
-       }
-
-       /* wait till sq is empty, unless a TX timeout occurred on this SQ */
-       while (sq->cc != sq->pc &&
-              !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
-               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-               if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
-                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
        }
 
-       /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
-       napi_synchronize(&sq->channel->napi);
-
-       mlx5e_free_tx_descs(sq);
        mlx5e_disable_sq(sq);
+       mlx5e_free_tx_descs(sq);
        mlx5e_destroy_sq(sq);
 }
 
@@ -1826,10 +1807,6 @@ int mlx5e_open_locked(struct net_device *netdev)
        netif_set_real_num_tx_queues(netdev, num_txqs);
        netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
 
-       err = mlx5e_set_dev_port_mtu(netdev);
-       if (err)
-               goto err_clear_state_opened_flag;
-
        err = mlx5e_open_channels(priv);
        if (err) {
                netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
@@ -2573,6 +2550,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
        u16 max_mtu;
        u16 min_mtu;
        int err = 0;
+       bool reset;
 
        mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
@@ -2588,13 +2566,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 
        mutex_lock(&priv->state_lock);
 
+       reset = !priv->params.lro_en &&
+               (priv->params.rq_wq_type !=
+                MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+
        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
+       if (was_opened && reset)
                mlx5e_close_locked(netdev);
 
        netdev->mtu = new_mtu;
+       mlx5e_set_dev_port_mtu(netdev);
 
-       if (was_opened)
+       if (was_opened && reset)
                err = mlx5e_open_locked(netdev);
 
        mutex_unlock(&priv->state_lock);
@@ -2794,7 +2777,7 @@ static void mlx5e_tx_timeout(struct net_device *dev)
                if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
                        continue;
                sched_work = true;
-               set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+               set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
                netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
                           i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
        }
@@ -3231,8 +3214,8 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
        struct mlx5_create_mkey_mbox_in *in;
        struct mlx5_mkey_seg *mkc;
        int inlen = sizeof(*in);
-       u64 npages =
-               priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+       u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev),
+                                        BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW));
        int err;
 
        in = mlx5_vzalloc(inlen);
@@ -3246,10 +3229,12 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
                     MLX5_PERM_LOCAL_WRITE |
                     MLX5_ACCESS_MODE_MTT;
 
+       npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages);
+
        mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
        mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn);
        mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
-       mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
+       mkc->xlt_oct_size = cpu_to_be32(MLX5_MTT_OCTW(npages));
        mkc->log2_page_size = PAGE_SHIFT;
 
        err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL,
@@ -3385,6 +3370,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
        queue_work(priv->wq, &priv->set_rx_mode_work);
 
        if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+               mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
                rep.load = mlx5e_nic_rep_load;
                rep.unload = mlx5e_nic_rep_unload;
                rep.vport = 0;
@@ -3463,6 +3449,8 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
        mlx5e_init_l2_addr(priv);
 
+       mlx5e_set_dev_port_mtu(netdev);
+
        err = register_netdev(netdev);
        if (err) {
                mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
@@ -3501,16 +3489,20 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
        int total_vfs = MLX5_TOTAL_VPORTS(mdev);
        int vport;
+       u8 mac[ETH_ALEN];
 
        if (!MLX5_CAP_GEN(mdev, vport_group_manager))
                return;
 
+       mlx5_query_nic_vport_mac_address(mdev, 0, mac);
+
        for (vport = 1; vport < total_vfs; vport++) {
                struct mlx5_eswitch_rep rep;
 
                rep.load = mlx5e_vport_rep_load;
                rep.unload = mlx5e_vport_rep_unload;
                rep.vport = vport;
+               ether_addr_copy(rep.hw_id, mac);
                mlx5_eswitch_register_vport_rep(esw, &rep);
        }
 }
index 1c7d8b8..134de4a 100644 (file)
@@ -135,17 +135,16 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       u8 mac[ETH_ALEN];
 
        if (esw->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-               mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac);
                attr->u.ppid.id_len = ETH_ALEN;
-               memcpy(&attr->u.ppid.id, &mac, ETH_ALEN);
+               ether_addr_copy(attr->u.ppid.id, rep->hw_id);
                break;
        default:
                return -EOPNOTSUPP;
index 9f2a16a..b6f8ebb 100644 (file)
@@ -324,9 +324,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
        }
 }
 
-static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
+static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-       return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS +
+       return rq->mpwqe_mtt_offset +
                wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
@@ -340,7 +340,7 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
        struct mlx5_wqe_data_seg      *dseg = &wqe->data;
        struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
        u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
-       u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix);
+       u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
        memset(wqe, 0, sizeof(*wqe));
        cseg->opmod_idx_opcode =
@@ -353,9 +353,9 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 
        ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
        ucseg->klm_octowords =
-               cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE));
+               cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
        ucseg->bsf_octowords =
-               cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset));
+               cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
        ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 
        dseg->lkey = sq->mkey_be;
@@ -423,7 +423,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 {
        struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
        int mtt_sz = mlx5e_get_wqe_mtt_sz();
-       u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT;
+       u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
        int i;
 
        wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) *
@@ -506,6 +506,12 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq)
        struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
        clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+
+       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
+               mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]);
+               return;
+       }
+
        mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
        rq->stats.mpwqe_frag++;
 
@@ -595,26 +601,9 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        wi->free_wqe(rq, wi);
 }
 
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
-{
-       struct mlx5_wq_ll *wq = &rq->wq;
-       struct mlx5e_rx_wqe *wqe;
-       __be16 wqe_ix_be;
-       u16 wqe_ix;
-
-       while (!mlx5_wq_ll_is_empty(wq)) {
-               wqe_ix_be = *wq->tail_next;
-               wqe_ix    = be16_to_cpu(wqe_ix_be);
-               wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
-               rq->dealloc_wqe(rq, wqe_ix);
-               mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
-                              &wqe->next.next_wqe_index);
-       }
-}
-
 #define RQ_CANNOT_POST(rq) \
-               (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
-                test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+       (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \
+        test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
@@ -916,7 +905,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
        int work_done = 0;
 
-       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
                return 0;
 
        if (cq->decmprs_left)
index 7b9d8a9..499487c 100644 (file)
@@ -70,6 +70,7 @@ struct mlx5e_sw_stats {
        u64 tx_queue_stopped;
        u64 tx_queue_wake;
        u64 tx_queue_dropped;
+       u64 tx_xmit_more;
        u64 rx_wqe_err;
        u64 rx_mpwqe_filler;
        u64 rx_mpwqe_frag;
@@ -101,6 +102,7 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
@@ -298,6 +300,7 @@ struct mlx5e_sq_stats {
        /* commonly accessed in data path */
        u64 packets;
        u64 bytes;
+       u64 xmit_more;
        u64 tso_packets;
        u64 tso_bytes;
        u64 tso_inner_packets;
@@ -324,6 +327,7 @@ static const struct counter_desc sq_stats_desc[] = {
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
 };
 
 #define NUM_SW_COUNTERS                        ARRAY_SIZE(sw_stats_desc)
index 0f19b01..22cfc4a 100644 (file)
@@ -170,7 +170,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
                struct flow_dissector_key_control *key =
                        skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
                                                  f->key);
                addr_type = key->addr_type;
        }
@@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                u32 *action, u32 *flow_tag)
 {
        const struct tc_action *a;
+       LIST_HEAD(actions);
 
        if (tc_no_actions(exts))
                return -EINVAL;
@@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
        *action = 0;
 
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                /* Only support a single action per rule */
                if (*action)
                        return -EINVAL;
@@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                u32 *action, u32 *dest_vport)
 {
        const struct tc_action *a;
+       LIST_HEAD(actions);
 
        if (tc_no_actions(exts))
                return -EINVAL;
 
        *action = 0;
 
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                /* Only support a single action per rule */
                if (*action)
                        return -EINVAL;
@@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
        struct mlx5e_tc_flow *flow;
        struct tc_action *a;
        struct mlx5_fc *counter;
+       LIST_HEAD(actions);
        u64 bytes;
        u64 packets;
        u64 lastuse;
@@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 
        mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 
-       tc_for_each_action(a, f->exts)
+       tcf_exts_to_list(f->exts, &actions);
+       list_for_each_entry(a, &actions, list)
                tcf_action_stats_update(a, bytes, packets, lastuse);
 
        return 0;
index e073bf5..988eca9 100644 (file)
@@ -375,6 +375,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 
        sq->stats.packets++;
        sq->stats.bytes += num_bytes;
+       sq->stats.xmit_more += skb->xmit_more;
        return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
@@ -394,35 +395,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        return mlx5e_sq_xmit(sq, skb);
 }
 
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
-{
-       struct mlx5e_tx_wqe_info *wi;
-       struct sk_buff *skb;
-       u16 ci;
-       int i;
-
-       while (sq->cc != sq->pc) {
-               ci = sq->cc & sq->wq.sz_m1;
-               skb = sq->skb[ci];
-               wi = &sq->wqe_info[ci];
-
-               if (!skb) { /* nop */
-                       sq->cc++;
-                       continue;
-               }
-
-               for (i = 0; i < wi->num_dma; i++) {
-                       struct mlx5e_sq_dma *dma =
-                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
-
-                       mlx5e_tx_dma_unmap(sq->pdev, dma);
-               }
-
-               dev_kfree_skb_any(skb);
-               sq->cc += wi->num_wqebbs;
-       }
-}
-
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_sq *sq;
@@ -434,7 +406,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
        sq = container_of(cq, struct mlx5e_sq, cq);
 
-       if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+       if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
                return false;
 
        npkts = 0;
@@ -512,11 +484,39 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
        if (netif_tx_queue_stopped(sq->txq) &&
-           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) &&
-           likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) {
-                               netif_tx_wake_queue(sq->txq);
-                               sq->stats.wake++;
+           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) {
+               netif_tx_wake_queue(sq->txq);
+               sq->stats.wake++;
        }
 
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
+
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+       struct mlx5e_tx_wqe_info *wi;
+       struct sk_buff *skb;
+       u16 ci;
+       int i;
+
+       while (sq->cc != sq->pc) {
+               ci = sq->cc & sq->wq.sz_m1;
+               skb = sq->skb[ci];
+               wi = &sq->wqe_info[ci];
+
+               if (!skb) { /* nop */
+                       sq->cc++;
+                       continue;
+               }
+
+               for (i = 0; i < wi->num_dma; i++) {
+                       struct mlx5e_sq_dma *dma =
+                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+                       mlx5e_tx_dma_unmap(sq->pdev, dma);
+               }
+
+               dev_kfree_skb_any(skb);
+               sq->cc += wi->num_wqebbs;
+       }
+}
index 64ae2e8..9bf33bb 100644 (file)
@@ -51,16 +51,18 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
 
 static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
+       struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq);
        struct mlx5_wq_cyc *wq;
        struct mlx5_cqe64 *cqe;
-       struct mlx5e_sq *sq;
        u16 sqcc;
 
+       if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+               return;
+
        cqe = mlx5e_get_cqe(cq);
        if (likely(!cqe))
                return;
 
-       sq = container_of(cq, struct mlx5e_sq, cq);
        wq = &sq->wq;
 
        /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
index f6d6677..8b78f15 100644 (file)
@@ -1451,7 +1451,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
        esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
-       if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */
+       /* Only VFs need ACLs for VST and spoofchk filtering */
+       if (vport_num && esw->mode == SRIOV_LEGACY) {
                esw_vport_ingress_config(esw, vport);
                esw_vport_egress_config(esw, vport);
        }
@@ -1502,7 +1503,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
         */
        esw_vport_change_handle_locked(vport);
        vport->enabled_events = 0;
-       if (vport_num) {
+       if (vport_num && esw->mode == SRIOV_LEGACY) {
                esw_vport_disable_egress_acl(esw, vport);
                esw_vport_disable_ingress_acl(esw, vport);
        }
@@ -1767,7 +1768,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
                               vport, err);
 
        mutex_lock(&esw->state_lock);
-       if (evport->enabled)
+       if (evport->enabled && esw->mode == SRIOV_LEGACY)
                err = esw_vport_ingress_config(esw, evport);
        mutex_unlock(&esw->state_lock);
        return err;
@@ -1839,7 +1840,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
        mutex_lock(&esw->state_lock);
        evport->vlan = vlan;
        evport->qos = qos;
-       if (evport->enabled) {
+       if (evport->enabled && esw->mode == SRIOV_LEGACY) {
                err = esw_vport_ingress_config(esw, evport);
                if (err)
                        goto out;
@@ -1868,10 +1869,11 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
        mutex_lock(&esw->state_lock);
        pschk = evport->spoofchk;
        evport->spoofchk = spoofchk;
-       if (evport->enabled)
+       if (evport->enabled && esw->mode == SRIOV_LEGACY) {
                err = esw_vport_ingress_config(esw, evport);
-       if (err)
-               evport->spoofchk = pschk;
+               if (err)
+                       evport->spoofchk = pschk;
+       }
        mutex_unlock(&esw->state_lock);
 
        return err;
index c0b0560..a961409 100644 (file)
@@ -174,6 +174,7 @@ struct mlx5_eswitch_rep {
        void                  *priv_data;
        struct list_head       vport_sqs_list;
        bool                   valid;
+       u8                     hw_id[ETH_ALEN];
 };
 
 struct mlx5_esw_offload {
index a357e8e..3dc83a9 100644 (file)
@@ -113,7 +113,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = vport;
 
-       flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+       flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
                                       0, &dest);
        if (IS_ERR(flow_rule))
@@ -535,7 +535,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
        esw_destroy_offloads_fdb_table(esw);
 }
 
-static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 {
        switch (mode) {
        case DEVLINK_ESWITCH_MODE_LEGACY:
@@ -551,6 +551,22 @@ static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
        return 0;
 }
 
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+       switch (mlx5_mode) {
+       case SRIOV_LEGACY:
+               *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+               break;
+       case SRIOV_OFFLOADS:
+               *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
        struct mlx5_core_dev *dev;
@@ -566,7 +582,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
        if (cur_mlx5_mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
-       if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode))
+       if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
        if (cur_mlx5_mode == mlx5_mode)
@@ -592,9 +608,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
        if (dev->priv.eswitch->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
-       *mode = dev->priv.eswitch->mode;
-
-       return 0;
+       return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
index 75bb8c8..3d6c1f6 100644 (file)
@@ -80,7 +80,7 @@
                           LEFTOVERS_NUM_PRIOS)
 
 #define ETHTOOL_PRIO_NUM_LEVELS 1
-#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
index c2877e9..3a9195b 100644 (file)
@@ -126,12 +126,21 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
        for (node = &first->node; node; node = rb_next(node)) {
                struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
                struct mlx5_fc_cache *c = &counter->cache;
+               u64 packets;
+               u64 bytes;
 
                if (counter->id > last_id)
                        break;
 
                mlx5_cmd_fc_bulk_get(dev, b,
-                                    counter->id, &c->packets, &c->bytes);
+                                    counter->id, &packets, &bytes);
+
+               if (c->packets == packets)
+                       continue;
+
+               c->packets = packets;
+               c->bytes = bytes;
+               c->lastuse = jiffies;
        }
 
 out:
index 4f491d4..2385bae 100644 (file)
@@ -1420,36 +1420,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
        dev_info(&pdev->dev, "%s was called\n", __func__);
        mlx5_enter_error_state(dev);
        mlx5_unload_one(dev, priv);
+       pci_save_state(pdev);
        mlx5_pci_disable_device(dev);
        return state == pci_channel_io_perm_failure ?
                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
 }
 
-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
-{
-       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-       int err = 0;
-
-       dev_info(&pdev->dev, "%s was called\n", __func__);
-
-       err = mlx5_pci_enable_device(dev);
-       if (err) {
-               dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
-                       , __func__, err);
-               return PCI_ERS_RESULT_DISCONNECT;
-       }
-       pci_set_master(pdev);
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-
-       return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
-}
-
-void mlx5_disable_device(struct mlx5_core_dev *dev)
-{
-       mlx5_pci_err_detected(dev->pdev, 0);
-}
-
 /* wait for the device to show vital signs by waiting
  * for the health counter to start counting.
  */
@@ -1477,21 +1453,44 @@ static int wait_vital(struct pci_dev *pdev)
        return -ETIMEDOUT;
 }
 
-static void mlx5_pci_resume(struct pci_dev *pdev)
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-       struct mlx5_priv *priv = &dev->priv;
        int err;
 
        dev_info(&pdev->dev, "%s was called\n", __func__);
 
-       pci_save_state(pdev);
-       err = wait_vital(pdev);
+       err = mlx5_pci_enable_device(dev);
        if (err) {
+               dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+                       , __func__, err);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       pci_set_master(pdev);
+       pci_restore_state(pdev);
+
+       if (wait_vital(pdev)) {
                dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
-               return;
+               return PCI_ERS_RESULT_DISCONNECT;
        }
 
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       struct mlx5_priv *priv = &dev->priv;
+       int err;
+
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+
        err = mlx5_load_one(dev, priv);
        if (err)
                dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
index f33b997..af371a8 100644 (file)
@@ -56,6 +56,7 @@
 #define MLXSW_PORT_PHY_BITS_MASK       (MLXSW_PORT_MAX_PHY_PORTS - 1)
 
 #define MLXSW_PORT_CPU_PORT            0x0
+#define MLXSW_PORT_ROUTER_PORT         (MLXSW_PORT_MAX_PHY_PORTS + 2)
 
 #define MLXSW_PORT_DONT_CARE           (MLXSW_PORT_MAX_PORTS)
 
index 7ca9201..1721098 100644 (file)
@@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
 
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
 /* reg_ritr_virtual_router
  * Virtual router ID associated with the router interface.
  * Access: RW
@@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
        mlxsw_reg_ritr_op_set(payload, op);
        mlxsw_reg_ritr_rif_set(payload, rif);
        mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+       mlxsw_reg_ritr_lb_en_set(payload, 1);
        mlxsw_reg_ritr_mtu_set(payload, mtu);
        mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
@@ -4000,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload,
 {
        MLXSW_REG_ZERO(ralue, payload);
        mlxsw_reg_ralue_protocol_set(payload, protocol);
+       mlxsw_reg_ralue_op_set(payload, op);
        mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
        mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
        mlxsw_reg_ralue_entry_type_set(payload,
index c3e6150..7291f2c 100644 (file)
@@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport)
        kfree(mlxsw_sp_vport);
 }
 
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-                         u16 vid)
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+                                __be16 __always_unused proto, u16 vid)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
        if (!vid)
                return 0;
 
-       if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
-               netdev_warn(dev, "VID=%d already configured\n", vid);
+       if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid))
                return 0;
-       }
 
        mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
-       if (!mlxsw_sp_vport) {
-               netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+       if (!mlxsw_sp_vport)
                return -ENOMEM;
-       }
 
        /* When adding the first VLAN interface on a bridged port we need to
         * transition all the active 802.1Q bridge VLANs to use explicit
@@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
         */
        if (list_is_singular(&mlxsw_sp_port->vports_list)) {
                err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
-               if (err) {
-                       netdev_err(dev, "Failed to set to Virtual mode\n");
+               if (err)
                        goto err_port_vp_mode_trans;
-               }
        }
 
        err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-       if (err) {
-               netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
+       if (err)
                goto err_port_vid_learning_set;
-       }
 
        err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
-       if (err) {
-               netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-                          vid);
+       if (err)
                goto err_port_add_vid;
-       }
 
        return 0;
 
@@ -1010,7 +999,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp_port *mlxsw_sp_vport;
        struct mlxsw_sp_fid *f;
-       int err;
 
        /* VLAN 0 is removed from HW filter when device goes down, but
         * it is reserved in our case, so simply return.
@@ -1019,23 +1007,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
                return 0;
 
        mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-       if (!mlxsw_sp_vport) {
-               netdev_warn(dev, "VID=%d does not exist\n", vid);
+       if (WARN_ON(!mlxsw_sp_vport))
                return 0;
-       }
 
-       err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
-       if (err) {
-               netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-                          vid);
-               return err;
-       }
+       mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 
-       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-       if (err) {
-               netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
-               return err;
-       }
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 
        /* Drop FID reference. If this was the last reference the
         * resources will be freed.
@@ -1048,13 +1025,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
         * transition all active 802.1Q bridge VLANs to use VID to FID
         * mappings and set port's mode to VLAN mode.
         */
-       if (list_is_singular(&mlxsw_sp_port->vports_list)) {
-               err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
-               if (err) {
-                       netdev_err(dev, "Failed to set to VLAN mode\n");
-                       return err;
-               }
-       }
+       if (list_is_singular(&mlxsw_sp_port->vports_list))
+               mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 
        mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
 
@@ -1149,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
                                          bool ingress)
 {
        const struct tc_action *a;
+       LIST_HEAD(actions);
        int err;
 
        if (!tc_single_action(cls->exts)) {
@@ -1156,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
                return -ENOTSUPP;
        }
 
-       tc_for_each_action(a, cls->exts) {
+       tcf_exts_to_list(cls->exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
                        return -ENOTSUPP;
 
@@ -2076,6 +2050,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
        return 0;
 }
 
+static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       mlxsw_sp_port->pvid = 1;
+
+       return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
+static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                                bool split, u8 module, u8 width, u8 lane)
 {
@@ -2191,7 +2177,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_port_dcb_init;
        }
 
+       err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_pvid_vport_create;
+       }
+
        mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+       mlxsw_sp->ports[local_port] = mlxsw_sp_port;
        err = register_netdev(dev);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -2208,24 +2202,23 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_core_port_init;
        }
 
-       err = mlxsw_sp_port_vlan_init(mlxsw_sp_port);
-       if (err)
-               goto err_port_vlan_init;
-
-       mlxsw_sp->ports[local_port] = mlxsw_sp_port;
        return 0;
 
-err_port_vlan_init:
-       mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 err_core_port_init:
        unregister_netdev(dev);
 err_register_netdev:
+       mlxsw_sp->ports[local_port] = NULL;
+       mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+       mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+err_port_pvid_vport_create:
+       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 err_port_dcb_init:
 err_port_ets_init:
 err_port_buffers_init:
 err_port_admin_status_set:
 err_port_mtu_set:
 err_port_speed_by_width_set:
+       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 err_port_swid_set:
 err_port_system_port_mapping_set:
 err_dev_addr_init:
@@ -2245,12 +2238,12 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 
        if (!mlxsw_sp_port)
                return;
-       mlxsw_sp->ports[local_port] = NULL;
        mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
        unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
-       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-       mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+       mlxsw_sp->ports[local_port] = NULL;
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+       mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
        mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
        mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
        free_percpu(mlxsw_sp_port->pcpu_stats);
@@ -2659,6 +2652,26 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
                .local_port = MLXSW_PORT_DONT_CARE,
                .trap_id = MLXSW_TRAP_ID_ARPUC,
        },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_MTUERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_TTLERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_LBERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_OSPF,
+       },
        {
                .func = mlxsw_sp_rx_listener_func,
                .local_port = MLXSW_PORT_DONT_CARE,
@@ -3311,6 +3324,39 @@ static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_sp_fid_find(mlxsw_sp, fid);
 }
 
+static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
+              MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
+}
+
+static u16 mlxsw_sp_flood_table_index_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
+}
+
+static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
+                                         bool set)
+{
+       enum mlxsw_flood_table_type table_type;
+       char *sftr_pl;
+       u16 index;
+       int err;
+
+       sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+       if (!sftr_pl)
+               return -ENOMEM;
+
+       table_type = mlxsw_sp_flood_table_type_get(fid);
+       index = mlxsw_sp_flood_table_index_get(fid);
+       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type,
+                           1, MLXSW_PORT_ROUTER_PORT, set);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
+
+       kfree(sftr_pl);
+       return err;
+}
+
 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
 {
        if (mlxsw_sp_fid_is_vfid(fid))
@@ -3347,10 +3393,14 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
        if (rif == MLXSW_SP_RIF_MAX)
                return -ERANGE;
 
-       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+       err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
        if (err)
                return err;
 
+       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+       if (err)
+               goto err_rif_bridge_op;
+
        err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
        if (err)
                goto err_rif_fdb_op;
@@ -3372,6 +3422,8 @@ err_rif_alloc:
        mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
 err_rif_fdb_op:
        mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+err_rif_bridge_op:
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
        return err;
 }
 
@@ -3391,6 +3443,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
 
        mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
 
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
+
        netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
 }
 
index f69aa37..ab3feb8 100644 (file)
@@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                 u16 vid);
 int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
                           u16 vid_end, bool is_member, bool untagged);
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-                         u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
                             bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
index 074cdda..953b214 100644 (file)
@@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
+       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
@@ -717,22 +717,18 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
        u8 local_port = mlxsw_sp_port->local_port;
        u8 pg_buff = tc_index;
        enum mlxsw_reg_sbxx_dir dir = pool_type;
-       u8 pool = pool_index;
+       u8 pool = pool_get(pool_index);
        u32 max_buff;
        int err;
 
+       if (dir != dir_get(pool_index))
+               return -EINVAL;
+
        err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
                                       threshold, &max_buff);
        if (err)
                return err;
 
-       if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) {
-               if (pool < MLXSW_SP_SB_POOL_COUNT)
-                       return -EINVAL;
-               pool -= MLXSW_SP_SB_POOL_COUNT;
-       } else if (pool >= MLXSW_SP_SB_POOL_COUNT) {
-               return -EINVAL;
-       }
        return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
                                    0, max_buff, pool);
 }
index 01cfb75..b6ed7f7 100644 (file)
@@ -341,6 +341,8 @@ static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
        char pfcc_pl[MLXSW_REG_PFCC_LEN];
 
        mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+       mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+       mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
        mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
 
        return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
@@ -351,17 +353,17 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
                                      struct ieee_pfc *pfc)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
        int err;
 
-       if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
-           pfc->pfc_en) {
+       if (pause_en && pfc->pfc_en) {
                netdev_err(dev, "PAUSE frames already enabled on port\n");
                return -EINVAL;
        }
 
        err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
                                           mlxsw_sp_port->dcb.ets->prio_tc,
-                                          false, pfc);
+                                          pause_en, pfc);
        if (err) {
                netdev_err(dev, "Failed to configure port's headroom for PFC\n");
                return err;
@@ -380,7 +382,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 
 err_port_pfc_set:
        __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-                                    mlxsw_sp_port->dcb.ets->prio_tc, false,
+                                    mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
                                     mlxsw_sp_port->dcb.pfc);
        return err;
 }
index 81418d6..917ddd1 100644 (file)
@@ -657,7 +657,7 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
                return 0;
        }
 
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
        if (WARN_ON(!r))
                return -EINVAL;
 
@@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
        const struct mlxsw_sp_router_fib4_add_info *info = data;
        struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
        struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+       struct mlxsw_sp_vr *vr = fib_entry->vr;
 
        mlxsw_sp_fib_entry_destroy(fib_entry);
-       mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+       mlxsw_sp_vr_put(mlxsw_sp, vr);
        kfree(info);
 }
 
index a1ad5e6..d1b59cd 100644 (file)
@@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
 
        kfree(f);
 
+       mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+
        mlxsw_sp_fid_op(mlxsw_sp, fid, false);
 }
 
@@ -997,13 +999,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 }
 
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
-                                    u16 vid_begin, u16 vid_end, bool init)
+                                    u16 vid_begin, u16 vid_end)
 {
        struct net_device *dev = mlxsw_sp_port->dev;
        u16 vid, pvid;
        int err;
 
-       if (!init && !mlxsw_sp_port->bridged)
+       if (!mlxsw_sp_port->bridged)
                return -EINVAL;
 
        err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
@@ -1014,9 +1016,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                return err;
        }
 
-       if (init)
-               goto out;
-
        pvid = mlxsw_sp_port->pvid;
        if (pvid >= vid_begin && pvid <= vid_end) {
                err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
@@ -1028,7 +1027,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 
        mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 
-out:
        /* Changing activity bits only if HW operation succeded */
        for (vid = vid_begin; vid <= vid_end; vid++)
                clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -1039,8 +1037,8 @@ out:
 static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                   const struct switchdev_obj_port_vlan *vlan)
 {
-       return __mlxsw_sp_port_vlans_del(mlxsw_sp_port,
-                                        vlan->vid_begin, vlan->vid_end, false);
+       return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin,
+                                        vlan->vid_end);
 }
 
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -1048,7 +1046,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
        u16 vid;
 
        for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
-               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid);
 }
 
 static int
@@ -1546,32 +1544,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
        mlxsw_sp_fdb_fini(mlxsw_sp);
 }
 
-int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-       struct net_device *dev = mlxsw_sp_port->dev;
-       int err;
-
-       /* Allow only untagged packets to ingress and tag them internally
-        * with VID 1.
-        */
-       mlxsw_sp_port->pvid = 1;
-       err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1,
-                                       true);
-       if (err) {
-               netdev_err(dev, "Unable to init VLANs\n");
-               return err;
-       }
-
-       /* Add implicit VLAN interface in the device, so that untagged
-        * packets will be classified to the default vFID.
-        */
-       err = mlxsw_sp_port_add_vid(dev, 0, 1);
-       if (err)
-               netdev_err(dev, "Failed to configure default vFID\n");
-
-       return err;
-}
-
 void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
        mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
index 470d769..ed8e301 100644 (file)
@@ -56,6 +56,10 @@ enum {
        MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
        MLXSW_TRAP_ID_ARPBC = 0x50,
        MLXSW_TRAP_ID_ARPUC = 0x51,
+       MLXSW_TRAP_ID_MTUERROR = 0x52,
+       MLXSW_TRAP_ID_TTLERROR = 0x53,
+       MLXSW_TRAP_ID_LBERROR = 0x54,
+       MLXSW_TRAP_ID_OSPF = 0x55,
        MLXSW_TRAP_ID_IP2ME = 0x5F,
        MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
        MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
index 4d4ecba..8e13ec8 100644 (file)
@@ -475,14 +475,6 @@ static void __lpc_get_mac(struct netdata_local *pldat, u8 *mac)
        mac[5] = tmp >> 8;
 }
 
-static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable)
-{
-       if (enable)
-               clk_prepare_enable(pldat->clk);
-       else
-               clk_disable_unprepare(pldat->clk);
-}
-
 static void __lpc_params_setup(struct netdata_local *pldat)
 {
        u32 tmp;
@@ -1056,7 +1048,7 @@ static int lpc_eth_close(struct net_device *ndev)
        writel(0, LPC_ENET_MAC2(pldat->net_base));
        spin_unlock_irqrestore(&pldat->lock, flags);
 
-       __lpc_eth_clock_enable(pldat, false);
+       clk_disable_unprepare(pldat->clk);
 
        return 0;
 }
@@ -1197,11 +1189,14 @@ static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 static int lpc_eth_open(struct net_device *ndev)
 {
        struct netdata_local *pldat = netdev_priv(ndev);
+       int ret;
 
        if (netif_msg_ifup(pldat))
                dev_dbg(&pldat->pdev->dev, "enabling %s\n", ndev->name);
 
-       __lpc_eth_clock_enable(pldat, true);
+       ret = clk_prepare_enable(pldat->clk);
+       if (ret)
+               return ret;
 
        /* Suspended PHY makes LPC ethernet core block, so resume now */
        phy_resume(ndev->phydev);
@@ -1320,7 +1315,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
        }
 
        /* Enable network clock */
-       __lpc_eth_clock_enable(pldat, true);
+       ret = clk_prepare_enable(pldat->clk);
+       if (ret)
+               goto err_out_clk_put;
 
        /* Map IO space */
        pldat->net_base = ioremap(res->start, resource_size(res));
@@ -1454,6 +1451,7 @@ err_out_iounmap:
        iounmap(pldat->net_base);
 err_out_disable_clocks:
        clk_disable_unprepare(pldat->clk);
+err_out_clk_put:
        clk_put(pldat->clk);
 err_out_free_dev:
        free_netdev(ndev);
index 35e5377..45ab746 100644 (file)
@@ -561,9 +561,18 @@ struct qed_dev {
 static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
                                        u32 concrete_fid)
 {
+       u8 vfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID);
        u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID);
+       u8 vf_valid = GET_FIELD(concrete_fid,
+                               PXP_CONCRETE_FID_VFVALID);
+       u8 sw_fid;
 
-       return pfid;
+       if (vf_valid)
+               sw_fid = vfid + MAX_NUM_PFS;
+       else
+               sw_fid = pfid;
+
+       return sw_fid;
 }
 
 #define PURE_LB_TC 8
index d0dc28f..226cb08 100644 (file)
@@ -52,40 +52,94 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
                  DCBX_APP_SF_ETHTYPE);
 }
 
+static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
+{
+       u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+       /* Old MFW */
+       if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+               return qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE);
+}
+
 static bool qed_dcbx_app_port(u32 app_info_bitmap)
 {
        return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
                  DCBX_APP_SF_PORT);
 }
 
-static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
 {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_DEFAULT);
+       u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+       /* Old MFW */
+       if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+               return qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT);
 }
 
-static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-       return !!(qed_dcbx_app_port(app_info_bitmap) &&
-                 proto_id == QED_TCP_PORT_ISCSI);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
 }
 
-static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_FCOE);
+       bool port;
+
+       if (ieee)
+               port = qed_dcbx_ieee_app_port(app_info_bitmap,
+                                             DCBX_APP_SF_IEEE_TCP_PORT);
+       else
+               port = qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(port && (proto_id == QED_TCP_PORT_ISCSI));
 }
 
-static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_ROCE);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE));
 }
 
-static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-       return !!(qed_dcbx_app_port(app_info_bitmap) &&
-                 proto_id == QED_UDP_PORT_TYPE_ROCE_V2);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
+}
+
+static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
+{
+       bool port;
+
+       if (ieee)
+               port = qed_dcbx_ieee_app_port(app_info_bitmap,
+                                             DCBX_APP_SF_IEEE_UDP_PORT);
+       else
+               port = qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2));
 }
 
 static void
@@ -164,17 +218,17 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 static bool
 qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
                               u32 app_prio_bitmap,
-                              u16 id, enum dcbx_protocol_type *type)
+                              u16 id, enum dcbx_protocol_type *type, bool ieee)
 {
-       if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) {
+       if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) {
                *type = DCBX_PROTOCOL_FCOE;
-       } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
                *type = DCBX_PROTOCOL_ROCE;
-       } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) {
                *type = DCBX_PROTOCOL_ISCSI;
-       } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
                *type = DCBX_PROTOCOL_ETH;
-       } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) {
                *type = DCBX_PROTOCOL_ROCE_V2;
        } else {
                *type = DCBX_MAX_PROTOCOL_TYPE;
@@ -194,17 +248,18 @@ static int
 qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
                     struct qed_dcbx_results *p_data,
                     struct dcbx_app_priority_entry *p_tbl,
-                    u32 pri_tc_tbl, int count, bool dcbx_enabled)
+                    u32 pri_tc_tbl, int count, u8 dcbx_version)
 {
        u8 tc, priority_map;
        enum dcbx_protocol_type type;
+       bool enable, ieee;
        u16 protocol_id;
        int priority;
-       bool enable;
        int i;
 
        DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
 
+       ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
        /* Parse APP TLV */
        for (i = 0; i < count; i++) {
                protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -219,7 +274,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 
                tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority);
                if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
-                                                  protocol_id, &type)) {
+                                                  protocol_id, &type, ieee)) {
                        /* ETH always have the enable bit reset, as it gets
                         * vlan information per packet. For other protocols,
                         * should be set according to the dcbx_enabled
@@ -275,15 +330,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
        struct dcbx_ets_feature *p_ets;
        struct qed_hw_info *p_info;
        u32 pri_tc_tbl, flags;
-       bool dcbx_enabled;
+       u8 dcbx_version;
        int num_entries;
        int rc = 0;
 
-       /* If DCBx version is non zero, then negotiation was
-        * successfuly performed
-        */
        flags = p_hwfn->p_dcbx_info->operational.flags;
-       dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+       dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
 
        p_app = &p_hwfn->p_dcbx_info->operational.features.app;
        p_tbl = p_app->app_pri_tbl;
@@ -295,13 +347,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
        num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
        rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
-                                 num_entries, dcbx_enabled);
+                                 num_entries, dcbx_version);
        if (rc)
                return rc;
 
        p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
        data.pf_id = p_hwfn->rel_pf_id;
-       data.dcbx_enabled = dcbx_enabled;
+       data.dcbx_enabled = !!dcbx_version;
 
        qed_dcbx_dp_protocol(p_hwfn, &data);
 
@@ -400,7 +452,7 @@ static void
 qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
                      struct dcbx_app_priority_feature *p_app,
                      struct dcbx_app_priority_entry *p_tbl,
-                     struct qed_dcbx_params *p_params)
+                     struct qed_dcbx_params *p_params, bool ieee)
 {
        struct qed_app_entry *entry;
        u8 pri_map;
@@ -414,15 +466,46 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
                                                      DCBX_APP_NUM_ENTRIES);
        for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                entry = &p_params->app_entry[i];
-               entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
-                                                    DCBX_APP_SF));
+               if (ieee) {
+                       u8 sf_ieee;
+                       u32 val;
+
+                       sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                   DCBX_APP_SF_IEEE);
+                       switch (sf_ieee) {
+                       case DCBX_APP_SF_IEEE_RESERVED:
+                               /* Old MFW */
+                               val = QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                       DCBX_APP_SF);
+                               entry->sf_ieee = val ?
+                                   QED_DCBX_SF_IEEE_TCP_UDP_PORT :
+                                   QED_DCBX_SF_IEEE_ETHTYPE;
+                               break;
+                       case DCBX_APP_SF_IEEE_ETHTYPE:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE;
+                               break;
+                       case DCBX_APP_SF_IEEE_TCP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT;
+                               break;
+                       case DCBX_APP_SF_IEEE_UDP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT;
+                               break;
+                       case DCBX_APP_SF_IEEE_TCP_UDP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT;
+                               break;
+                       }
+               } else {
+                       entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                            DCBX_APP_SF));
+               }
+
                pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
                entry->prio = ffs(pri_map) - 1;
                entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
                                                    DCBX_APP_PROTOCOL_ID);
                qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
                                               entry->proto_id,
-                                              &entry->proto_type);
+                                              &entry->proto_type, ieee);
        }
 
        DP_VERBOSE(p_hwfn, QED_MSG_DCB,
@@ -483,7 +566,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
        bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
        tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
        tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
-       pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+       pri_map = p_ets->pri_tc_tbl[0];
        for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
                p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
                p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
@@ -500,9 +583,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn,
                           struct dcbx_app_priority_feature *p_app,
                           struct dcbx_app_priority_entry *p_tbl,
                           struct dcbx_ets_feature *p_ets,
-                          u32 pfc, struct qed_dcbx_params *p_params)
+                          u32 pfc, struct qed_dcbx_params *p_params, bool ieee)
 {
-       qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+       qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee);
        qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
        qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
 }
@@ -516,7 +599,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn,
        p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
        qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                   p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->local.params);
+                                  p_feat->pfc, &params->local.params, false);
        params->local.valid = true;
 }
 
@@ -529,7 +612,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn,
        p_feat = &p_hwfn->p_dcbx_info->remote.features;
        qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                   p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->remote.params);
+                                  p_feat->pfc, &params->remote.params, false);
        params->remote.valid = true;
 }
 
@@ -574,7 +657,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn,
 
        qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                   p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->operational.params);
+                                  p_feat->pfc, &params->operational.params,
+                                  p_operational->ieee);
        qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
        err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
        p_operational->err = err;
@@ -944,7 +1028,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
                val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
                p_ets->pri_tc_tbl[0] |= val;
        }
-       p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
        for (i = 0; i < 2; i++) {
                p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
                p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
@@ -954,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
 static void
 qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
                      struct dcbx_app_priority_feature *p_app,
-                     struct qed_dcbx_params *p_params)
+                     struct qed_dcbx_params *p_params, bool ieee)
 {
        u32 *entry;
        int i;
@@ -975,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
 
        for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                entry = &p_app->app_pri_tbl[i].entry;
-               *entry &= ~DCBX_APP_SF_MASK;
-               if (p_params->app_entry[i].ethtype)
-                       *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
-                                  DCBX_APP_SF_SHIFT);
-               else
-                       *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+               if (ieee) {
+                       *entry &= ~DCBX_APP_SF_IEEE_MASK;
+                       switch (p_params->app_entry[i].sf_ieee) {
+                       case QED_DCBX_SF_IEEE_ETHTYPE:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_TCP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_UDP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       }
+               } else {
+                       *entry &= ~DCBX_APP_SF_MASK;
+                       if (p_params->app_entry[i].ethtype)
+                               *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+                                          DCBX_APP_SF_SHIFT);
+                       else
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
+               }
+
                *entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
                *entry |= ((u32)p_params->app_entry[i].proto_id <<
                           DCBX_APP_PROTOCOL_ID_SHIFT);
@@ -995,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
                          struct dcbx_local_params *local_admin,
                          struct qed_dcbx_set *params)
 {
+       bool ieee = false;
+
        local_admin->flags = 0;
        memcpy(&local_admin->features,
               &p_hwfn->p_dcbx_info->operational.features,
               sizeof(local_admin->features));
 
-       if (params->enabled)
+       if (params->enabled) {
                local_admin->config = params->ver_num;
-       else
+               ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE);
+       } else {
                local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+       }
 
        if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
                qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
@@ -1015,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
 
        if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
                qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
-                                     &params->config.params);
+                                     &params->config.params, ieee);
 }
 
 int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
@@ -1596,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev,
                if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
                        break;
                /* First empty slot */
-               if (!entry->proto_id)
+               if (!entry->proto_id) {
+                       dcbx_set.config.params.num_app_entries++;
                        break;
+               }
        }
 
        if (i == QED_DCBX_MAX_APP_PROTOCOL) {
@@ -2117,8 +2230,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
                    (entry->proto_id == app->protocol))
                        break;
                /* First empty slot */
-               if (!entry->proto_id)
+               if (!entry->proto_id) {
+                       dcbx_set.config.params.num_app_entries++;
                        break;
+               }
        }
 
        if (i == QED_DCBX_MAX_APP_PROTOCOL) {
index 5927840..6f9d3b8 100644 (file)
@@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry {
 #define DCBX_APP_SF_SHIFT              8
 #define DCBX_APP_SF_ETHTYPE            0
 #define DCBX_APP_SF_PORT               1
+#define DCBX_APP_SF_IEEE_MASK          0x0000f000
+#define DCBX_APP_SF_IEEE_SHIFT         12
+#define DCBX_APP_SF_IEEE_RESERVED      0
+#define DCBX_APP_SF_IEEE_ETHTYPE       1
+#define DCBX_APP_SF_IEEE_TCP_PORT      2
+#define DCBX_APP_SF_IEEE_UDP_PORT      3
+#define DCBX_APP_SF_IEEE_TCP_UDP_PORT  4
+
 #define DCBX_APP_PROTOCOL_ID_MASK      0xffff0000
 #define DCBX_APP_PROTOCOL_ID_SHIFT     16
 };
index e4bd02e..a6eb6af 100644 (file)
@@ -722,11 +722,14 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
        txq->tx_db.data.bd_prod =
                cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-       if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq))
+       if (!skb->xmit_more || netif_xmit_stopped(netdev_txq))
                qede_update_tx_producer(txq);
 
        if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
                      < (MAX_SKB_FRAGS + 1))) {
+               if (skb->xmit_more)
+                       qede_update_tx_producer(txq);
+
                netif_tx_stop_queue(netdev_txq);
                DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED,
                           "Stop queue was called\n");
index fd973f4..49bad00 100644 (file)
@@ -37,8 +37,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 64
-#define QLCNIC_LINUX_VERSIONID  "5.3.64"
+#define _QLCNIC_LINUX_SUBVERSION 65
+#define QLCNIC_LINUX_VERSIONID  "5.3.65"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
                 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
index 87c642d..fedd736 100644 (file)
 #define QLCNIC_RESPONSE_DESC   0x05
 #define QLCNIC_LRO_DESC        0x12
 
-#define QLCNIC_TX_POLL_BUDGET          128
 #define QLCNIC_TCP_HDR_SIZE            20
 #define QLCNIC_TCP_TS_OPTION_SIZE      12
 #define QLCNIC_FETCH_RING_ID(handle)   ((handle) >> 63)
@@ -2008,7 +2007,6 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget)
        struct qlcnic_host_tx_ring *tx_ring;
        struct qlcnic_adapter *adapter;
 
-       budget = QLCNIC_TX_POLL_BUDGET;
        tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
        adapter = tx_ring->adapter;
        work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
index 017d8c2..24061b9 100644 (file)
@@ -156,10 +156,8 @@ struct qlcnic_vf_info {
        spinlock_t                      vlan_list_lock; /* Lock for VLAN list */
 };
 
-struct qlcnic_async_work_list {
+struct qlcnic_async_cmd {
        struct list_head        list;
-       struct work_struct      work;
-       void                    *ptr;
        struct qlcnic_cmd_args  *cmd;
 };
 
@@ -168,7 +166,10 @@ struct qlcnic_back_channel {
        struct workqueue_struct *bc_trans_wq;
        struct workqueue_struct *bc_async_wq;
        struct workqueue_struct *bc_flr_wq;
-       struct list_head        async_list;
+       struct qlcnic_adapter   *adapter;
+       struct list_head        async_cmd_list;
+       struct work_struct      vf_async_work;
+       spinlock_t              queue_lock; /* async_cmd_list queue lock */
 };
 
 struct qlcnic_sriov {
index 7327b72..d710705 100644 (file)
@@ -29,6 +29,7 @@
 #define QLC_83XX_VF_RESET_FAIL_THRESH  8
 #define QLC_BC_CMD_MAX_RETRY_CNT       5
 
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work);
 static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *);
 static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32);
 static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *);
@@ -177,7 +178,10 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
        }
 
        bc->bc_async_wq =  wq;
-       INIT_LIST_HEAD(&bc->async_list);
+       INIT_LIST_HEAD(&bc->async_cmd_list);
+       INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd);
+       spin_lock_init(&bc->queue_lock);
+       bc->adapter = adapter;
 
        for (i = 0; i < num_vfs; i++) {
                vf = &sriov->vf_info[i];
@@ -1517,17 +1521,21 @@ static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac,
 
 void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc)
 {
-       struct list_head *head = &bc->async_list;
-       struct qlcnic_async_work_list *entry;
+       struct list_head *head = &bc->async_cmd_list;
+       struct qlcnic_async_cmd *entry;
 
        flush_workqueue(bc->bc_async_wq);
+       cancel_work_sync(&bc->vf_async_work);
+
+       spin_lock(&bc->queue_lock);
        while (!list_empty(head)) {
-               entry = list_entry(head->next, struct qlcnic_async_work_list,
+               entry = list_entry(head->next, struct qlcnic_async_cmd,
                                   list);
-               cancel_work_sync(&entry->work);
                list_del(&entry->list);
+               kfree(entry->cmd);
                kfree(entry);
        }
+       spin_unlock(&bc->queue_lock);
 }
 
 void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
@@ -1587,57 +1595,64 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
 
 static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
 {
-       struct qlcnic_async_work_list *entry;
-       struct qlcnic_adapter *adapter;
+       struct qlcnic_async_cmd *entry, *tmp;
+       struct qlcnic_back_channel *bc;
        struct qlcnic_cmd_args *cmd;
+       struct list_head *head;
+       LIST_HEAD(del_list);
+
+       bc = container_of(work, struct qlcnic_back_channel, vf_async_work);
+       head = &bc->async_cmd_list;
+
+       spin_lock(&bc->queue_lock);
+       list_splice_init(head, &del_list);
+       spin_unlock(&bc->queue_lock);
+
+       list_for_each_entry_safe(entry, tmp, &del_list, list) {
+               list_del(&entry->list);
+               cmd = entry->cmd;
+               __qlcnic_sriov_issue_cmd(bc->adapter, cmd);
+               kfree(entry);
+       }
+
+       if (!list_empty(head))
+               queue_work(bc->bc_async_wq, &bc->vf_async_work);
 
-       entry = container_of(work, struct qlcnic_async_work_list, work);
-       adapter = entry->ptr;
-       cmd = entry->cmd;
-       __qlcnic_sriov_issue_cmd(adapter, cmd);
        return;
 }
 
-static struct qlcnic_async_work_list *
-qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc)
+static struct qlcnic_async_cmd *
+qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc,
+                            struct qlcnic_cmd_args *cmd)
 {
-       struct list_head *node;
-       struct qlcnic_async_work_list *entry = NULL;
-       u8 empty = 0;
+       struct qlcnic_async_cmd *entry = NULL;
 
-       list_for_each(node, &bc->async_list) {
-               entry = list_entry(node, struct qlcnic_async_work_list, list);
-               if (!work_pending(&entry->work)) {
-                       empty = 1;
-                       break;
-               }
-       }
+       entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+       if (!entry)
+               return NULL;
 
-       if (!empty) {
-               entry = kzalloc(sizeof(struct qlcnic_async_work_list),
-                               GFP_ATOMIC);
-               if (entry == NULL)
-                       return NULL;
-               list_add_tail(&entry->list, &bc->async_list);
-       }
+       entry->cmd = cmd;
+
+       spin_lock(&bc->queue_lock);
+       list_add_tail(&entry->list, &bc->async_cmd_list);
+       spin_unlock(&bc->queue_lock);
 
        return entry;
 }
 
 static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
-                                           work_func_t func, void *data,
                                            struct qlcnic_cmd_args *cmd)
 {
-       struct qlcnic_async_work_list *entry = NULL;
+       struct qlcnic_async_cmd *entry = NULL;
 
-       entry = qlcnic_sriov_get_free_node_async_work(bc);
-       if (!entry)
+       entry = qlcnic_sriov_alloc_async_cmd(bc, cmd);
+       if (!entry) {
+               qlcnic_free_mbx_args(cmd);
+               kfree(cmd);
                return;
+       }
 
-       entry->ptr = data;
-       entry->cmd = cmd;
-       INIT_WORK(&entry->work, func);
-       queue_work(bc->bc_async_wq, &entry->work);
+       queue_work(bc->bc_async_wq, &bc->vf_async_work);
 }
 
 static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
@@ -1649,8 +1664,8 @@ static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
        if (adapter->need_fw_reset)
                return -EIO;
 
-       qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
-                                       adapter, cmd);
+       qlcnic_sriov_schedule_async_cmd(bc, cmd);
+
        return 0;
 }
 
index deae10d..5297bf7 100644 (file)
@@ -467,8 +467,8 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
        unsigned int rx_tail = cp->rx_tail;
        int rx;
 
-rx_status_loop:
        rx = 0;
+rx_status_loop:
        cpw16(IntrStatus, cp_rx_intr_mask);
 
        while (rx < budget) {
index f658fee..e00a669 100644 (file)
@@ -1517,13 +1517,14 @@ static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask)
        }
 
 #if BITS_PER_LONG == 64
+       BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 2);
        mask[0] = raw_mask[0];
        mask[1] = raw_mask[1];
 #else
+       BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 3);
        mask[0] = raw_mask[0] & 0xffffffff;
        mask[1] = raw_mask[0] >> 32;
        mask[2] = raw_mask[1] & 0xffffffff;
-       mask[3] = raw_mask[1] >> 32;
 #endif
 }
 
index 726b80f..503a3b6 100644 (file)
@@ -2275,6 +2275,13 @@ static int smc_drv_probe(struct platform_device *pdev)
        if (pd) {
                memcpy(&lp->cfg, pd, sizeof(lp->cfg));
                lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
+
+               if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
+                       dev_err(&pdev->dev,
+                               "at least one of 8-bit or 16-bit access support is required.\n");
+                       ret = -ENXIO;
+                       goto out_free_netdev;
+               }
        }
 
 #if IS_BUILTIN(CONFIG_OF)
index 1a55c79..e17671c 100644 (file)
 #include <linux/dmaengine.h>
 #include <linux/smc91x.h>
 
+/*
+ * Any 16-bit access is performed with two 8-bit accesses if the hardware
+ * can't do it directly. Most registers are 16-bit so those are mandatory.
+ */
+#define SMC_outw_b(x, a, r)                                            \
+       do {                                                            \
+               unsigned int __val16 = (x);                             \
+               unsigned int __reg = (r);                               \
+               SMC_outb(__val16, a, __reg);                            \
+               SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \
+       } while (0)
+
+#define SMC_inw_b(a, r)                                                        \
+       ({                                                              \
+               unsigned int __val16;                                   \
+               unsigned int __reg = r;                                 \
+               __val16  = SMC_inb(a, __reg);                           \
+               __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
+               __val16;                                                \
+       })
+
 /*
  * Define your architecture specific bus configuration parameters here.
  */
 #define SMC_IO_SHIFT           (lp->io_shift)
 
 #define SMC_inb(a, r)          readb((a) + (r))
-#define SMC_inw(a, r)          readw((a) + (r))
+#define SMC_inw(a, r)                                                  \
+       ({                                                              \
+               unsigned int __smc_r = r;                               \
+               SMC_16BIT(lp) ? readw((a) + __smc_r) :                  \
+               SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) :                  \
+               ({ BUG(); 0; });                                        \
+       })
+
 #define SMC_inl(a, r)          readl((a) + (r))
 #define SMC_outb(v, a, r)      writeb(v, (a) + (r))
+#define SMC_outw(v, a, r)                                              \
+       do {                                                            \
+               unsigned int __v = v, __smc_r = r;                      \
+               if (SMC_16BIT(lp))                                      \
+                       __SMC_outw(__v, a, __smc_r);                    \
+               else if (SMC_8BIT(lp))                                  \
+                       SMC_outw_b(__v, a, __smc_r);                    \
+               else                                                    \
+                       BUG();                                          \
+       } while (0)
+
 #define SMC_outl(v, a, r)      writel(v, (a) + (r))
+#define SMC_insb(a, r, p, l)   readsb((a) + (r), p, l)
+#define SMC_outsb(a, r, p, l)  writesb((a) + (r), p, l)
 #define SMC_insw(a, r, p, l)   readsw((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)  writesw((a) + (r), p, l)
 #define SMC_insl(a, r, p, l)   readsl((a) + (r), p, l)
 #define SMC_IRQ_FLAGS          (-1)    /* from resource */
 
 /* We actually can't write halfwords properly if not word aligned */
-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 {
        if ((machine_is_mainstone() || machine_is_stargate2() ||
             machine_is_pxa_idp()) && reg & 2) {
@@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
 
 #if ! SMC_CAN_USE_16BIT
 
-/*
- * Any 16-bit access is performed with two 8-bit accesses if the hardware
- * can't do it directly. Most registers are 16-bit so those are mandatory.
- */
-#define SMC_outw(x, ioaddr, reg)                                       \
-       do {                                                            \
-               unsigned int __val16 = (x);                             \
-               SMC_outb( __val16, ioaddr, reg );                       \
-               SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
-       } while (0)
-#define SMC_inw(ioaddr, reg)                                           \
-       ({                                                              \
-               unsigned int __val16;                                   \
-               __val16 =  SMC_inb( ioaddr, reg );                      \
-               __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
-               __val16;                                                \
-       })
-
+#define SMC_outw(x, ioaddr, reg)       SMC_outw_b(x, ioaddr, reg)
+#define SMC_inw(ioaddr, reg)           SMC_inw_b(ioaddr, reg)
 #define SMC_insw(a, r, p, l)           BUG()
 #define SMC_outsw(a, r, p, l)          BUG()
 
index 9f159a7..5a3941b 100644 (file)
@@ -1622,13 +1622,7 @@ static void dwceqos_init_hw(struct net_local *lp)
                DWCEQOS_MMC_CTRL_RSTONRD);
        dwceqos_enable_mmc_interrupt(lp);
 
-       /* Enable Interrupts */
-       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
-                     DWCEQOS_DMA_CH0_IE_NIE |
-                     DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
-                     DWCEQOS_DMA_CH0_IE_AIE |
-                     DWCEQOS_DMA_CH0_IE_FBEE);
-
+       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, 0);
        dwceqos_write(lp, REG_DWCEQOS_MAC_IE, 0);
 
        dwceqos_write(lp, REG_DWCEQOS_MAC_CFG, DWCEQOS_MAC_CFG_IPC |
@@ -1905,6 +1899,15 @@ static int dwceqos_open(struct net_device *ndev)
        netif_start_queue(ndev);
        tasklet_enable(&lp->tx_bdreclaim_tasklet);
 
+       /* Enable Interrupts -- do this only after we enable NAPI and the
+        * tasklet.
+        */
+       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
+                     DWCEQOS_DMA_CH0_IE_NIE |
+                     DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
+                     DWCEQOS_DMA_CH0_IE_AIE |
+                     DWCEQOS_DMA_CH0_IE_FBEE);
+
        return 0;
 }
 
index 7452b5f..7108c68 100644 (file)
@@ -1987,7 +1987,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if ((readl(nic->regs + FPGA_VER) & 0xFFF) >= 378) {
                err = pci_enable_msi(pdev);
                if (err)
-                       pr_err("Can't eneble msi. error is %d\n", err);
+                       pr_err("Can't enable msi. error is %d\n", err);
                else
                        nic->irq_type = IRQ_MSI;
        } else
index c51f346..f85d605 100644 (file)
@@ -734,6 +734,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
                netif_receive_skb(skb);
                ndev->stats.rx_bytes += len;
                ndev->stats.rx_packets++;
+               kmemleak_not_leak(new_skb);
        } else {
                ndev->stats.rx_dropped++;
                new_skb = skb;
@@ -1325,6 +1326,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
                                kfree_skb(skb);
                                goto err_cleanup;
                        }
+                       kmemleak_not_leak(skb);
                }
                /* continue even if we didn't manage to submit all
                 * receive descs
index 01a7714..8fd1312 100644 (file)
@@ -166,6 +166,7 @@ static struct platform_driver tsi_eth_driver = {
 
 static void tsi108_timed_checker(unsigned long dev_ptr);
 
+#ifdef DEBUG
 static void dump_eth_one(struct net_device *dev)
 {
        struct tsi108_prv_data *data = netdev_priv(dev);
@@ -190,6 +191,7 @@ static void dump_eth_one(struct net_device *dev)
               TSI_READ(TSI108_EC_RXESTAT),
               TSI_READ(TSI108_EC_RXERR), data->rxpending);
 }
+#endif
 
 /* Synchronization is needed between the thread and up/down events.
  * Note that the PHY is accessed through the same registers for both
index 3cee84a..93dc10b 100644 (file)
@@ -1131,11 +1131,13 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
        lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
        mac_address = of_get_mac_address(ofdev->dev.of_node);
 
-       if (mac_address)
+       if (mac_address) {
                /* Set the MAC address. */
                memcpy(ndev->dev_addr, mac_address, ETH_ALEN);
-       else
-               dev_warn(dev, "No MAC address found\n");
+       } else {
+               dev_warn(dev, "No MAC address found, using random\n");
+               eth_hw_addr_random(ndev);
+       }
 
        /* Clear the Tx CSR's in case this is a restart */
        __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET);
index 467fb8b..591af71 100644 (file)
@@ -644,12 +644,6 @@ struct netvsc_reconfig {
        u32 event;
 };
 
-struct garp_wrk {
-       struct work_struct dwrk;
-       struct net_device *netdev;
-       struct netvsc_device *netvsc_dev;
-};
-
 /* The context of the netvsc device  */
 struct net_device_context {
        /* point back to our device context */
@@ -667,7 +661,6 @@ struct net_device_context {
 
        struct work_struct work;
        u32 msg_enable; /* debug level */
-       struct garp_wrk gwrk;
 
        struct netvsc_stats __percpu *tx_stats;
        struct netvsc_stats __percpu *rx_stats;
@@ -678,6 +671,15 @@ struct net_device_context {
 
        /* the device is going away */
        bool start_remove;
+
+       /* State to manage the associated VF interface. */
+       struct net_device *vf_netdev;
+       bool vf_inject;
+       atomic_t vf_use_cnt;
+       /* 1: allocated, serial number is valid. 0: not allocated */
+       u32 vf_alloc;
+       /* Serial number of the VF to team with */
+       u32 vf_serial;
 };
 
 /* Per netvsc device */
@@ -733,15 +735,7 @@ struct netvsc_device {
        u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
        u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-       /* 1: allocated, serial number is valid. 0: not allocated */
-       u32 vf_alloc;
-       /* Serial number of the VF to team with */
-       u32 vf_serial;
        atomic_t open_cnt;
-       /* State to manage the associated VF interface. */
-       bool vf_inject;
-       struct net_device *vf_netdev;
-       atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
index 20e0917..410fb8e 100644 (file)
@@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void)
        init_waitqueue_head(&net_device->wait_drain);
        net_device->destroy = false;
        atomic_set(&net_device->open_cnt, 0);
-       atomic_set(&net_device->vf_use_cnt, 0);
        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-       net_device->vf_netdev = NULL;
-       net_device->vf_inject = false;
-
        return net_device;
 }
 
@@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev,
                nvscdev->send_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
                           struct nvsp_message *nvmsg)
 {
-       nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-       nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+       net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+       net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
 static inline void netvsc_receive_inband(struct hv_device *hdev,
-                                        struct netvsc_device *nvdev,
-                                        struct nvsp_message *nvmsg)
+                                struct net_device_context *net_device_ctx,
+                                struct nvsp_message *nvmsg)
 {
        switch (nvmsg->hdr.msg_type) {
        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
                break;
 
        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-               netvsc_send_vf(nvdev, nvmsg);
+               netvsc_send_vf(net_device_ctx, nvmsg);
                break;
        }
 }
@@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
                                   struct vmpacket_descriptor *desc)
 {
        struct nvsp_message *nvmsg;
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
        nvmsg = (struct nvsp_message *)((unsigned long)
                desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
                break;
 
        case VM_PKT_DATA_INBAND:
-               netvsc_receive_inband(device, net_device, nvmsg);
+               netvsc_receive_inband(device, net_device_ctx, nvmsg);
                break;
 
        default:
index 41bd952..3ba29fc 100644 (file)
@@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
        struct sk_buff *skb;
        struct sk_buff *vf_skb;
        struct netvsc_stats *rx_stats;
-       struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
        u32 bytes_recvd = packet->total_data_buflen;
        int ret = 0;
 
        if (!net || net->reg_state != NETREG_REGISTERED)
                return NVSP_STAT_FAIL;
 
-       if (READ_ONCE(netvsc_dev->vf_inject)) {
-               atomic_inc(&netvsc_dev->vf_use_cnt);
-               if (!READ_ONCE(netvsc_dev->vf_inject)) {
+       if (READ_ONCE(net_device_ctx->vf_inject)) {
+               atomic_inc(&net_device_ctx->vf_use_cnt);
+               if (!READ_ONCE(net_device_ctx->vf_inject)) {
                        /*
                         * We raced; just move on.
                         */
-                       atomic_dec(&netvsc_dev->vf_use_cnt);
+                       atomic_dec(&net_device_ctx->vf_use_cnt);
                        goto vf_injection_done;
                }
 
@@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
                 * the host). Deliver these via the VF interface
                 * in the guest.
                 */
-               vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-                                              csum_info, *data, vlan_tci);
+               vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+                                              packet, csum_info, *data,
+                                              vlan_tci);
                if (vf_skb != NULL) {
-                       ++netvsc_dev->vf_netdev->stats.rx_packets;
-                       netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+                       ++net_device_ctx->vf_netdev->stats.rx_packets;
+                       net_device_ctx->vf_netdev->stats.rx_bytes +=
+                               bytes_recvd;
                        netif_receive_skb(vf_skb);
                } else {
                        ++net->stats.rx_dropped;
                        ret = NVSP_STAT_FAIL;
                }
-               atomic_dec(&netvsc_dev->vf_use_cnt);
+               atomic_dec(&net_device_ctx->vf_use_cnt);
                return ret;
        }
 
@@ -1150,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev)
        free_netdev(netdev);
 }
 
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
-       struct garp_wrk *gwrk;
-
-       gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-       netdev_notify_peers(gwrk->netdev);
-
-       atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
-}
-
 static struct net_device *get_netvsc_net_device(char *mac)
 {
        struct net_device *dev, *found = NULL;
@@ -1203,7 +1193,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
-       if (netvsc_dev == NULL)
+       if (!netvsc_dev || net_device_ctx->vf_netdev)
                return NOTIFY_DONE;
 
        netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1211,10 +1201,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
         * Take a reference on the module.
         */
        try_module_get(THIS_MODULE);
-       netvsc_dev->vf_netdev = vf_netdev;
+       net_device_ctx->vf_netdev = vf_netdev;
        return NOTIFY_OK;
 }
 
+static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
+{
+       net_device_ctx->vf_inject = true;
+}
+
+static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
+{
+       net_device_ctx->vf_inject = false;
+
+       /* Wait for currently active users to drain out. */
+       while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
+               udelay(50);
+}
 
 static int netvsc_vf_up(struct net_device *vf_netdev)
 {
@@ -1233,11 +1236,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
 
-       if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                return NOTIFY_DONE;
 
        netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-       netvsc_dev->vf_inject = true;
+       netvsc_inject_enable(net_device_ctx);
 
        /*
         * Open the device before switching data path.
@@ -1252,15 +1255,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 
        netif_carrier_off(ndev);
 
-       /*
-        * Now notify peers. We are scheduling work to
-        * notify peers; take a reference to prevent
-        * the VF interface from vanishing.
-        */
-       atomic_inc(&netvsc_dev->vf_use_cnt);
-       net_device_ctx->gwrk.netdev = vf_netdev;
-       net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-       schedule_work(&net_device_ctx->gwrk.dwrk);
+       /* Now notify peers through VF device. */
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
 
        return NOTIFY_OK;
 }
@@ -1283,29 +1279,18 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
 
-       if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                return NOTIFY_DONE;
 
        netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-       netvsc_dev->vf_inject = false;
-       /*
-        * Wait for currently active users to
-        * drain out.
-        */
-
-       while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
-               udelay(50);
+       netvsc_inject_disable(net_device_ctx);
        netvsc_switch_datapath(ndev, false);
        netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
        rndis_filter_close(netvsc_dev);
        netif_carrier_on(ndev);
-       /*
-        * Notify peers.
-        */
-       atomic_inc(&netvsc_dev->vf_use_cnt);
-       net_device_ctx->gwrk.netdev = ndev;
-       net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-       schedule_work(&net_device_ctx->gwrk.dwrk);
+
+       /* Now notify peers through netvsc device. */
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
 
        return NOTIFY_OK;
 }
@@ -1327,11 +1312,11 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
-       if (netvsc_dev == NULL)
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                return NOTIFY_DONE;
        netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-
-       netvsc_dev->vf_netdev = NULL;
+       netvsc_inject_disable(net_device_ctx);
+       net_device_ctx->vf_netdev = NULL;
        module_put(THIS_MODULE);
        return NOTIFY_OK;
 }
@@ -1377,11 +1362,14 @@ static int netvsc_probe(struct hv_device *dev,
 
        INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
        INIT_WORK(&net_device_ctx->work, do_set_multicast);
-       INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
 
        spin_lock_init(&net_device_ctx->lock);
        INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
 
+       atomic_set(&net_device_ctx->vf_use_cnt, 0);
+       net_device_ctx->vf_netdev = NULL;
+       net_device_ctx->vf_inject = false;
+
        net->netdev_ops = &device_ops;
 
        net->hw_features = NETVSC_HW_FEATURES;
@@ -1494,8 +1482,13 @@ static int netvsc_netdev_event(struct notifier_block *this,
 {
        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
-       /* Avoid Vlan, Bonding dev with same MAC registering as VF */
-       if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+       /* Avoid Vlan dev with same MAC registering as VF */
+       if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+               return NOTIFY_DONE;
+
+       /* Avoid Bonding master dev with same MAC registering as VF */
+       if (event_dev->priv_flags & IFF_BONDING &&
+           event_dev->flags & IFF_MASTER)
                return NOTIFY_DONE;
 
        switch (event) {
index d13e6e1..351e701 100644 (file)
@@ -270,6 +270,7 @@ struct macsec_dev {
        struct pcpu_secy_stats __percpu *stats;
        struct list_head secys;
        struct gro_cells gro_cells;
+       unsigned int nest_level;
 };
 
 /**
@@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 
 #define MACSEC_FEATURES \
        (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+static struct lock_class_key macsec_netdev_addr_lock_key;
+
 static int macsec_dev_init(struct net_device *dev)
 {
        struct macsec_dev *macsec = macsec_priv(dev);
@@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev)
        return macsec_priv(dev)->real_dev->ifindex;
 }
 
+
+static int macsec_get_nest_level(struct net_device *dev)
+{
+       return macsec_priv(dev)->nest_level;
+}
+
+
 static const struct net_device_ops macsec_netdev_ops = {
        .ndo_init               = macsec_dev_init,
        .ndo_uninit             = macsec_dev_uninit,
@@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = {
        .ndo_start_xmit         = macsec_start_xmit,
        .ndo_get_stats64        = macsec_get_stats64,
        .ndo_get_iflink         = macsec_get_iflink,
+       .ndo_get_lock_subclass  = macsec_get_nest_level,
 };
 
 static const struct device_type macsec_type = {
@@ -3047,22 +3058,31 @@ static void macsec_del_dev(struct macsec_dev *macsec)
        }
 }
 
+static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
+{
+       struct macsec_dev *macsec = macsec_priv(dev);
+       struct net_device *real_dev = macsec->real_dev;
+
+       unregister_netdevice_queue(dev, head);
+       list_del_rcu(&macsec->secys);
+       macsec_del_dev(macsec);
+       netdev_upper_dev_unlink(real_dev, dev);
+
+       macsec_generation++;
+}
+
 static void macsec_dellink(struct net_device *dev, struct list_head *head)
 {
        struct macsec_dev *macsec = macsec_priv(dev);
        struct net_device *real_dev = macsec->real_dev;
        struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
 
-       macsec_generation++;
+       macsec_common_dellink(dev, head);
 
-       unregister_netdevice_queue(dev, head);
-       list_del_rcu(&macsec->secys);
        if (list_empty(&rxd->secys)) {
                netdev_rx_handler_unregister(real_dev);
                kfree(rxd);
        }
-
-       macsec_del_dev(macsec);
 }
 
 static int register_macsec_dev(struct net_device *real_dev,
@@ -3181,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
        dev_hold(real_dev);
 
+       macsec->nest_level = dev_get_nest_level(real_dev) + 1;
+       netdev_lockdep_set_classes(dev);
+       lockdep_set_class_and_subclass(&dev->addr_list_lock,
+                                      &macsec_netdev_addr_lock_key,
+                                      macsec_get_nest_level(dev));
+
+       err = netdev_upper_dev_link(real_dev, dev);
+       if (err < 0)
+               goto unregister;
+
        /* need to be already registered so that ->init has run and
         * the MAC addr is set
         */
@@ -3193,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
        if (rx_handler && sci_exists(real_dev, sci)) {
                err = -EBUSY;
-               goto unregister;
+               goto unlink;
        }
 
        err = macsec_add_dev(dev, sci, icv_len);
        if (err)
-               goto unregister;
+               goto unlink;
 
        if (data)
                macsec_changelink_common(dev, data);
@@ -3213,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 del_dev:
        macsec_del_dev(macsec);
+unlink:
+       netdev_upper_dev_unlink(real_dev, dev);
 unregister:
        unregister_netdevice(dev);
        return err;
@@ -3382,8 +3414,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event,
 
                rxd = macsec_data_rtnl(real_dev);
                list_for_each_entry_safe(m, n, &rxd->secys, secys) {
-                       macsec_dellink(m->secy.netdev, &head);
+                       macsec_common_dellink(m->secy.netdev, &head);
                }
+
+               netdev_rx_handler_unregister(real_dev);
+               kfree(rxd);
+
                unregister_netdevice_many(&head);
                break;
        }
index cd9b538..3234fcd 100644 (file)
@@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
        vlan->dev      = dev;
        vlan->port     = port;
        vlan->set_features = MACVLAN_FEATURES;
-       vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
+       vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
 
        vlan->mode     = MACVLAN_MODE_VEPA;
        if (data && data[IFLA_MACVLAN_MODE])
index a38c0da..070e329 100644 (file)
@@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q)
        rtnl_unlock();
 
        synchronize_rcu();
-       skb_array_cleanup(&q->skb_array);
        sock_put(&q->sk);
 }
 
@@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk)
 static void macvtap_sock_destruct(struct sock *sk)
 {
        struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
-       struct sk_buff *skb;
 
-       while ((skb = skb_array_consume(&q->skb_array)) != NULL)
-               kfree_skb(skb);
+       skb_array_cleanup(&q->skb_array);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
index 1882d98..885ac9c 100644 (file)
@@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev,
                data[i] = kszphy_get_stat(phydev, i);
 }
 
-static int kszphy_resume(struct phy_device *phydev)
+static int kszphy_suspend(struct phy_device *phydev)
 {
-       int value;
+       /* Disable PHY Interrupts */
+       if (phy_interrupt_is_valid(phydev)) {
+               phydev->interrupts = PHY_INTERRUPT_DISABLED;
+               if (phydev->drv->config_intr)
+                       phydev->drv->config_intr(phydev);
+       }
 
-       mutex_lock(&phydev->lock);
+       return genphy_suspend(phydev);
+}
 
-       value = phy_read(phydev, MII_BMCR);
-       phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+static int kszphy_resume(struct phy_device *phydev)
+{
+       genphy_resume(phydev);
 
-       kszphy_config_intr(phydev);
-       mutex_unlock(&phydev->lock);
+       /* Enable PHY Interrupts */
+       if (phy_interrupt_is_valid(phydev)) {
+               phydev->interrupts = PHY_INTERRUPT_ENABLED;
+               if (phydev->drv->config_intr)
+                       phydev->drv->config_intr(phydev);
+       }
 
        return 0;
 }
@@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
+       .suspend        = kszphy_suspend,
        .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8061,
@@ -953,7 +964,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
        .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8873MLL,
        .phy_id_mask    = MICREL_PHY_ID_MASK,
index c5dc2c3..c6f6683 100644 (file)
@@ -722,8 +722,10 @@ phy_err:
 int phy_start_interrupts(struct phy_device *phydev)
 {
        atomic_set(&phydev->irq_disable, 0);
-       if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
-                       phydev) < 0) {
+       if (request_irq(phydev->irq, phy_interrupt,
+                               IRQF_SHARED,
+                               "phy_interrupt",
+                               phydev) < 0) {
                pr_warn("%s: Can't get IRQ %d (PHY)\n",
                        phydev->mdio.bus->name, phydev->irq);
                phydev->irq = PHY_POLL;
index cdb19b3..b228bea 100644 (file)
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/filter.h>
 #include <linux/if_team.h>
 
+static rx_handler_result_t lb_receive(struct team *team, struct team_port *port,
+                                     struct sk_buff *skb)
+{
+       if (unlikely(skb->protocol == htons(ETH_P_SLOW))) {
+               /* LACPDU packets should go to exact delivery */
+               const unsigned char *dest = eth_hdr(skb)->h_dest;
+
+               if (is_link_local_ether_addr(dest) && dest[5] == 0x02)
+                       return RX_HANDLER_EXACT;
+       }
+       return RX_HANDLER_ANOTHER;
+}
+
 struct lb_priv;
 
 typedef struct team_port *lb_select_tx_port_func_t(struct team *,
@@ -652,6 +666,7 @@ static const struct team_mode_ops lb_mode_ops = {
        .port_enter             = lb_port_enter,
        .port_leave             = lb_port_leave,
        .port_disabled          = lb_port_disabled,
+       .receive                = lb_receive,
        .transmit               = lb_transmit,
 };
 
index 9c8b5bc..6f9df37 100644 (file)
@@ -894,11 +894,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
                goto drop;
 
-       if (skb->sk && sk_fullsock(skb->sk)) {
-               sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
-                                 &skb_shinfo(skb)->tx_flags);
-               sw_tx_timestamp(skb);
-       }
+       skb_tx_timestamp(skb);
 
        /* Orphan the skb - required as we might hang on to it
         * for indefinite time.
index 770212b..528b9c9 100644 (file)
@@ -1009,6 +1009,7 @@ static int kaweth_probe(
        struct net_device *netdev;
        const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
        int result = 0;
+       int rv = -EIO;
 
        dev_dbg(dev,
                "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
@@ -1029,6 +1030,7 @@ static int kaweth_probe(
        kaweth = netdev_priv(netdev);
        kaweth->dev = udev;
        kaweth->net = netdev;
+       kaweth->intf = intf;
 
        spin_lock_init(&kaweth->device_lock);
        init_waitqueue_head(&kaweth->term_wait);
@@ -1048,6 +1050,10 @@ static int kaweth_probe(
                /* Download the firmware */
                dev_info(dev, "Downloading firmware...\n");
                kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL);
+               if (!kaweth->firmware_buf) {
+                       rv = -ENOMEM;
+                       goto err_free_netdev;
+               }
                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/new_code.bin",
                                                      100,
@@ -1139,8 +1145,6 @@ err_fw:
 
        dev_dbg(dev, "Initializing net device.\n");
 
-       kaweth->intf = intf;
-
        kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!kaweth->tx_urb)
                goto err_free_netdev;
@@ -1204,7 +1208,7 @@ err_only_tx:
 err_free_netdev:
        free_netdev(netdev);
 
-       return -EIO;
+       return rv;
 }
 
 /****************************************************************
index c68fe49..4244b9d 100644 (file)
@@ -914,7 +914,9 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 {
        struct Vmxnet3_TxDataDesc *tdd;
 
-       tdd = tq->data_ring.base + tq->tx_ring.next2fill;
+       tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base +
+                                           tq->tx_ring.next2fill *
+                                           tq->txdata_desc_size);
 
        memcpy(tdd->data, skb->data, ctx->copy_size);
        netdev_dbg(adapter->netdev,
index 74fc030..7dc37a0 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.9.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.a.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040900
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040a00
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index da4e3d6..c0dda6f 100644 (file)
@@ -1811,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
        fl4.flowi4_mark = skb->mark;
        fl4.flowi4_proto = IPPROTO_UDP;
        fl4.daddr = daddr;
-       fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+       fl4.saddr = *saddr;
 
        rt = ip_route_output_key(vxlan->net, &fl4);
        if (!IS_ERR(rt)) {
@@ -1847,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
        memset(&fl6, 0, sizeof(fl6));
        fl6.flowi6_oif = oif;
        fl6.daddr = *daddr;
-       fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+       fl6.saddr = *saddr;
        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
        fl6.flowi6_mark = skb->mark;
        fl6.flowi6_proto = IPPROTO_UDP;
@@ -1920,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        struct rtable *rt = NULL;
        const struct iphdr *old_iph;
        union vxlan_addr *dst;
-       union vxlan_addr remote_ip;
+       union vxlan_addr remote_ip, local_ip;
+       union vxlan_addr *src;
        struct vxlan_metadata _md;
        struct vxlan_metadata *md = &_md;
        __be16 src_port = 0, dst_port;
@@ -1938,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
                vni = rdst->remote_vni;
                dst = &rdst->remote_ip;
+               src = &vxlan->cfg.saddr;
                dst_cache = &rdst->dst_cache;
        } else {
                if (!info) {
@@ -1948,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
                vni = vxlan_tun_id_to_vni(info->key.tun_id);
                remote_ip.sa.sa_family = ip_tunnel_info_af(info);
-               if (remote_ip.sa.sa_family == AF_INET)
+               if (remote_ip.sa.sa_family == AF_INET) {
                        remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
-               else
+                       local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+               } else {
                        remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+                       local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+               }
                dst = &remote_ip;
+               src = &local_ip;
                dst_cache = &info->dst_cache;
        }
 
@@ -1992,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        }
 
        if (dst->sa.sa_family == AF_INET) {
-               __be32 saddr;
-
                if (!vxlan->vn4_sock)
                        goto drop;
                sk = vxlan->vn4_sock->sock->sk;
 
                rt = vxlan_get_route(vxlan, skb,
                                     rdst ? rdst->remote_ifindex : 0, tos,
-                                    dst->sin.sin_addr.s_addr, &saddr,
+                                    dst->sin.sin_addr.s_addr,
+                                    &src->sin.sin_addr.s_addr,
                                     dst_cache, info);
                if (IS_ERR(rt)) {
                        netdev_dbg(dev, "no route to %pI4\n",
@@ -2017,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                }
 
                /* Bypass encapsulation if the destination is local */
-               if (rt->rt_flags & RTCF_LOCAL &&
+               if (!info && rt->rt_flags & RTCF_LOCAL &&
                    !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
                        struct vxlan_dev *dst_vxlan;
 
@@ -2043,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                if (err < 0)
                        goto xmit_tx_error;
 
-               udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+               udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
                                    dst->sin.sin_addr.s_addr, tos, ttl, df,
                                    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
                struct dst_entry *ndst;
-               struct in6_addr saddr;
                u32 rt6i_flags;
 
                if (!vxlan->vn6_sock)
@@ -2058,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
                ndst = vxlan6_get_route(vxlan, skb,
                                        rdst ? rdst->remote_ifindex : 0, tos,
-                                       label, &dst->sin6.sin6_addr, &saddr,
+                                       label, &dst->sin6.sin6_addr,
+                                       &src->sin6.sin6_addr,
                                        dst_cache, info);
                if (IS_ERR(ndst)) {
                        netdev_dbg(dev, "no route to %pI6\n",
@@ -2077,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
                /* Bypass encapsulation if the destination is local */
                rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
-               if (rt6i_flags & RTF_LOCAL &&
+               if (!info && rt6i_flags & RTF_LOCAL &&
                    !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
                        struct vxlan_dev *dst_vxlan;
 
@@ -2104,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        return;
                }
                udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
-                                    &saddr, &dst->sin6.sin6_addr, tos, ttl,
+                                    &src->sin6.sin6_addr,
+                                    &dst->sin6.sin6_addr, tos, ttl,
                                     label, src_port, dst_port, !udp_sum);
 #endif
        }
index 1d68916..9e1f2d9 100644 (file)
@@ -5700,10 +5700,11 @@ out:
        mutex_unlock(&wl->mutex);
 }
 
-static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw,
+                                            struct ieee80211_sta *sta)
 {
        struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
-       struct wl1271 *wl = wl_sta->wl;
+       struct wl1271 *wl = hw->priv;
        u8 hlid = wl_sta->hlid;
 
        /* return in units of Kbps */
index 88e9166..368795a 100644 (file)
@@ -1269,6 +1269,7 @@ static int btt_blk_init(struct btt *btt)
                }
        }
        set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+       btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
        revalidate_disk(btt->btt_disk);
 
        return 0;
index 3fa7919..97dd292 100644 (file)
@@ -140,10 +140,30 @@ static ssize_t namespace_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(namespace);
 
+static ssize_t size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_btt *nd_btt = to_nd_btt(dev);
+       ssize_t rc;
+
+       device_lock(dev);
+       if (dev->driver)
+               rc = sprintf(buf, "%llu\n", nd_btt->size);
+       else {
+               /* no size to convey if the btt instance is disabled */
+               rc = -ENXIO;
+       }
+       device_unlock(dev);
+
+       return rc;
+}
+static DEVICE_ATTR_RO(size);
+
 static struct attribute *nd_btt_attributes[] = {
        &dev_attr_sector_size.attr,
        &dev_attr_namespace.attr,
        &dev_attr_uuid.attr,
+       &dev_attr_size.attr,
        NULL,
 };
 
index 4047639..8024a0e 100644 (file)
@@ -143,6 +143,7 @@ struct nd_btt {
        struct nd_namespace_common *ndns;
        struct btt *btt;
        unsigned long lbasize;
+       u64 size;
        u8 *uuid;
        int id;
 };
index 7ff2e82..2feacc7 100644 (file)
@@ -81,10 +81,12 @@ EXPORT_SYMBOL_GPL(nvme_cancel_request);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                enum nvme_ctrl_state new_state)
 {
-       enum nvme_ctrl_state old_state = ctrl->state;
+       enum nvme_ctrl_state old_state;
        bool changed = false;
 
        spin_lock_irq(&ctrl->lock);
+
+       old_state = ctrl->state;
        switch (new_state) {
        case NVME_CTRL_LIVE:
                switch (old_state) {
@@ -140,11 +142,12 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        default:
                break;
        }
-       spin_unlock_irq(&ctrl->lock);
 
        if (changed)
                ctrl->state = new_state;
 
+       spin_unlock_irq(&ctrl->lock);
+
        return changed;
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -608,7 +611,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 
        ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
                        NVME_QID_ANY, 0, 0);
-       if (ret >= 0)
+       if (ret >= 0 && result)
                *result = le32_to_cpu(cqe.result);
        return ret;
 }
@@ -628,7 +631,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 
        ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
                        NVME_QID_ANY, 0, 0);
-       if (ret >= 0)
+       if (ret >= 0 && result)
                *result = le32_to_cpu(cqe.result);
        return ret;
 }
index d7c33f9..8dcf5a9 100644 (file)
@@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
                reinit_completion(&dev->ioq_wait);
  retry:
                timeout = ADMIN_TIMEOUT;
-               for (; i > 0; i--) {
-                       struct nvme_queue *nvmeq = dev->queues[i];
-
-                       if (!pass)
-                               nvme_suspend_queue(nvmeq);
-                       if (nvme_delete_queue(nvmeq, opcode))
+               for (; i > 0; i--, sent++)
+                       if (nvme_delete_queue(dev->queues[i], opcode))
                                break;
-                       ++sent;
-               }
+
                while (sent--) {
                        timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
                        if (timeout == 0)
@@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                nvme_stop_queues(&dev->ctrl);
                csts = readl(dev->bar + NVME_REG_CSTS);
        }
+
+       for (i = dev->queue_count - 1; i > 0; i--)
+               nvme_suspend_queue(dev->queues[i]);
+
        if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
-               for (i = dev->queue_count - 1; i >= 0; i--) {
-                       struct nvme_queue *nvmeq = dev->queues[i];
-                       nvme_suspend_queue(nvmeq);
-               }
+               nvme_suspend_queue(dev->queues[0]);
        } else {
                nvme_disable_io_queues(dev);
                nvme_disable_admin_queue(dev, shutdown);
index 3e3ce2b..8d2875b 100644 (file)
  * more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/string.h>
-#include <linux/jiffies.h>
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
 #include <linux/types.h>
@@ -26,7 +24,6 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/nvme.h>
-#include <linux/t10-pi.h>
 #include <asm/unaligned.h>
 
 #include <rdma/ib_verbs.h>
@@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always,
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *event);
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
 
 /* XXX: really should move to a generic header sooner or later.. */
 static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
        list_del(&ctrl->list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       if (ctrl->ctrl.tagset) {
-               blk_cleanup_queue(ctrl->ctrl.connect_q);
-               blk_mq_free_tag_set(&ctrl->tag_set);
-               nvme_rdma_dev_put(ctrl->device);
-       }
        kfree(ctrl->queues);
        nvmf_free_options(nctrl->opts);
 free_ctrl:
@@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
 
-       if (ctrl->queue_count > 1)
+       if (ctrl->queue_count > 1) {
                nvme_start_queues(&ctrl->ctrl);
+               nvme_queue_scan(&ctrl->ctrl);
+               nvme_queue_async_events(&ctrl->ctrl);
+       }
 
        dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 {
        struct nvme_rdma_ctrl *ctrl = queue->ctrl;
        struct rdma_conn_param param = { };
-       struct nvme_rdma_cm_req priv;
+       struct nvme_rdma_cm_req priv = { };
        int ret;
 
        param.qp_num = queue->qp->qp_num;
@@ -1318,37 +1312,39 @@ out_destroy_queue_ib:
  * that caught the event. Since we hold the callout until the controller
  * deletion is completed, we'll deadlock if the controller deletion will
  * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
+ * of destroying this queue before-hand, destroy the queue resources,
+ * then queue the controller deletion which won't destroy this queue and
+ * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
  */
 static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
 {
        struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-       int ret, ctrl_deleted = 0;
+       int ret;
 
-       /* First disable the queue so ctrl delete won't free it */
-       if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
-               goto out;
+       /* Own the controller deletion */
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+               return 0;
 
-       /* delete the controller */
-       ret = __nvme_rdma_del_ctrl(ctrl);
-       if (!ret) {
-               dev_warn(ctrl->ctrl.device,
-                       "Got rdma device removal event, deleting ctrl\n");
-               flush_work(&ctrl->delete_work);
+       dev_warn(ctrl->ctrl.device,
+               "Got rdma device removal event, deleting ctrl\n");
 
-               /* Return non-zero so the cm_id will destroy implicitly */
-               ctrl_deleted = 1;
+       /* Get rid of reconnect work if its running */
+       cancel_delayed_work_sync(&ctrl->reconnect_work);
 
+       /* Disable the queue so ctrl delete won't free it */
+       if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
                /* Free this queue ourselves */
-               rdma_disconnect(queue->cm_id);
-               ib_drain_qp(queue->qp);
+               nvme_rdma_stop_queue(queue);
                nvme_rdma_destroy_queue_ib(queue);
+
+               /* Return non-zero so the cm_id will destroy implicitly */
+               ret = 1;
        }
 
-out:
-       return ctrl_deleted;
+       /* Queue controller deletion */
+       queue_work(nvme_rdma_wq, &ctrl->delete_work);
+       flush_work(&ctrl->delete_work);
+       return ret;
 }
 
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
                nvme_rdma_free_io_queues(ctrl);
        }
 
-       if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+       if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
                nvme_shutdown_ctrl(&ctrl->ctrl);
 
        blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
        nvme_rdma_destroy_admin_queue(ctrl);
 }
 
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+       nvme_uninit_ctrl(&ctrl->ctrl);
+       if (shutdown)
+               nvme_rdma_shutdown_ctrl(ctrl);
+
+       if (ctrl->ctrl.tagset) {
+               blk_cleanup_queue(ctrl->ctrl.connect_q);
+               blk_mq_free_tag_set(&ctrl->tag_set);
+               nvme_rdma_dev_put(ctrl->device);
+       }
+
+       nvme_put_ctrl(&ctrl->ctrl);
+}
+
 static void nvme_rdma_del_ctrl_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl = container_of(work,
                                struct nvme_rdma_ctrl, delete_work);
 
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_rdma_shutdown_ctrl(ctrl);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
+       __nvme_rdma_remove_ctrl(ctrl, true);
 }
 
 static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
        struct nvme_rdma_ctrl *ctrl = container_of(work,
                                struct nvme_rdma_ctrl, delete_work);
 
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
+       __nvme_rdma_remove_ctrl(ctrl, false);
 }
 
 static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
        if (ctrl->queue_count > 1) {
                nvme_start_queues(&ctrl->ctrl);
                nvme_queue_scan(&ctrl->ctrl);
+               nvme_queue_async_events(&ctrl->ctrl);
        }
 
        return;
index 2fac17a..47c564b 100644 (file)
@@ -13,7 +13,6 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
-#include <linux/random.h>
 #include <generated/utsrelease.h>
 #include "nvmet.h"
 
@@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
        struct nvme_id_ctrl *id;
-       u64 serial;
        u16 status = 0;
 
        id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
        id->vid = 0;
        id->ssvid = 0;
 
-       /* generate a random serial number as our controllers are ephemeral: */
-       get_random_bytes(&serial, sizeof(serial));
        memset(id->sn, ' ', sizeof(id->sn));
-       snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+       snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
 
        memset(id->mn, ' ', sizeof(id->mn));
        strncpy((char *)id->mn, "Linux", sizeof(id->mn));
index 8a891ca..6559d5a 100644 (file)
@@ -13,6 +13,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/random.h>
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
        memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
        memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 
+       /* generate a random serial number as our controllers are ephemeral: */
+       get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
        kref_init(&ctrl->ref);
        ctrl->subsys = subsys;
 
index 94e7829..7affd40 100644 (file)
@@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
        struct nvme_loop_ctrl *ctrl = container_of(work,
                                struct nvme_loop_ctrl, delete_work);
 
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_loop_shutdown_ctrl(ctrl);
        nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_loop_shutdown_ctrl(ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -501,7 +500,6 @@ out_free_queues:
        nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
        dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-       nvme_remove_namespaces(&ctrl->ctrl);
        nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
 }
index 57dd6d8..76b6eed 100644 (file)
@@ -113,6 +113,7 @@ struct nvmet_ctrl {
 
        struct mutex            lock;
        u64                     cap;
+       u64                     serial;
        u32                     cc;
        u32                     csts;
 
index e06d504..b4d6485 100644 (file)
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
        NVMET_RDMA_Q_CONNECTING,
        NVMET_RDMA_Q_LIVE,
        NVMET_RDMA_Q_DISCONNECTING,
+       NVMET_RDMA_IN_DEVICE_REMOVAL,
 };
 
 struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
        if (!len)
                return 0;
 
-       /* use the already allocated data buffer if possible */
-       if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
-               nvmet_rdma_use_inline_sg(rsp, len, 0);
-       } else {
-               status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
-                               len);
-               if (status)
-                       return status;
-       }
+       status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+                       len);
+       if (status)
+               return status;
 
        ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
                        rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
        struct nvmet_rdma_device *dev = queue->dev;
 
        nvmet_rdma_free_queue(queue);
-       rdma_destroy_id(cm_id);
+
+       if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+               rdma_destroy_id(cm_id);
+
        kref_put(&dev->ref, nvmet_rdma_free_dev);
 }
 
@@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
        switch (queue->state) {
        case NVMET_RDMA_Q_CONNECTING:
        case NVMET_RDMA_Q_LIVE:
-               disconnect = true;
                queue->state = NVMET_RDMA_Q_DISCONNECTING;
+       case NVMET_RDMA_IN_DEVICE_REMOVAL:
+               disconnect = true;
                break;
        case NVMET_RDMA_Q_DISCONNECTING:
                break;
@@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
        schedule_work(&queue->release_work);
 }
 
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue:      nvmet rdma queue (cm id qp_context)
+ * @addr:      nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+               struct nvmet_rdma_queue *queue)
+{
+       unsigned long flags;
+
+       if (!queue) {
+               struct nvmet_port *port = cm_id->context;
+
+               /*
+                * This is a listener cm_id. Make sure that
+                * future remove_port won't invoke a double
+                * cm_id destroy. use atomic xchg to make sure
+                * we don't compete with remove_port.
+                */
+               if (xchg(&port->priv, NULL) != cm_id)
+                       return 0;
+       } else {
+               /*
+                * This is a queue cm_id. Make sure that
+                * release queue will not destroy the cm_id
+                * and schedule all ctrl queues removal (only
+                * if the queue is not disconnecting already).
+                */
+               spin_lock_irqsave(&queue->state_lock, flags);
+               if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+                       queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+               spin_unlock_irqrestore(&queue->state_lock, flags);
+               nvmet_rdma_queue_disconnect(queue);
+               flush_scheduled_work();
+       }
+
+       /*
+        * We need to return 1 so that the core will destroy
+        * it's own ID.  What a great API design..
+        */
+       return 1;
+}
+
 static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *event)
 {
@@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                break;
        case RDMA_CM_EVENT_ADDR_CHANGE:
        case RDMA_CM_EVENT_DISCONNECTED:
-       case RDMA_CM_EVENT_DEVICE_REMOVAL:
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-               /*
-                * We can get the device removal callback even for a
-                * CM ID that we aren't actually using.  In that case
-                * the context pointer is NULL, so we shouldn't try
-                * to disconnect a non-existing queue.  But we also
-                * need to return 1 so that the core will destroy
-                * it's own ID.  What a great API design..
-                */
-               if (queue)
-                       nvmet_rdma_queue_disconnect(queue);
-               else
-                       ret = 1;
+               nvmet_rdma_queue_disconnect(queue);
+               break;
+       case RDMA_CM_EVENT_DEVICE_REMOVAL:
+               ret = nvmet_rdma_device_removal(cm_id, queue);
                break;
        case RDMA_CM_EVENT_REJECTED:
        case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1443,10 @@ out_destroy_id:
 
 static void nvmet_rdma_remove_port(struct nvmet_port *port)
 {
-       struct rdma_cm_id *cm_id = port->priv;
+       struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
 
-       rdma_destroy_id(cm_id);
+       if (cm_id)
+               rdma_destroy_id(cm_id);
 }
 
 static struct nvmet_fabrics_ops nvmet_rdma_ops = {
index 7792266..3ce6953 100644 (file)
@@ -1631,8 +1631,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np,
         */
 
  err:
-       if (it.node)
-               of_node_put(it.node);
+       of_node_put(it.node);
        return rc;
 }
 
@@ -2343,20 +2342,13 @@ struct device_node *of_graph_get_endpoint_by_regs(
        const struct device_node *parent, int port_reg, int reg)
 {
        struct of_endpoint endpoint;
-       struct device_node *node, *prev_node = NULL;
-
-       while (1) {
-               node = of_graph_get_next_endpoint(parent, prev_node);
-               of_node_put(prev_node);
-               if (!node)
-                       break;
+       struct device_node *node = NULL;
 
+       for_each_endpoint_of_node(parent, node) {
                of_graph_parse_endpoint(node, &endpoint);
                if (((port_reg == -1) || (endpoint.port == port_reg)) &&
                        ((reg == -1) || (endpoint.id == reg)))
                        return node;
-
-               prev_node = node;
        }
 
        return NULL;
index 55f1b83..085c638 100644 (file)
@@ -517,7 +517,7 @@ static void *__unflatten_device_tree(const void *blob,
                pr_warning("End of tree marker overwritten: %08x\n",
                           be32_to_cpup(mem + size));
 
-       if (detached) {
+       if (detached && mynodes) {
                of_node_set_flag(*mynodes, OF_DETACHED);
                pr_debug("unflattened tree is detached\n");
        }
index 89a71c6..a2e68f7 100644 (file)
@@ -544,12 +544,15 @@ void __init of_irq_init(const struct of_device_id *matches)
 
                        list_del(&desc->list);
 
+                       of_node_set_flag(desc->dev, OF_POPULATED);
+
                        pr_debug("of_irq_init: init %s (%p), parent %p\n",
                                 desc->dev->full_name,
                                 desc->dev, desc->interrupt_parent);
                        ret = desc->irq_init_cb(desc->dev,
                                                desc->interrupt_parent);
                        if (ret) {
+                               of_node_clear_flag(desc->dev, OF_POPULATED);
                                kfree(desc);
                                continue;
                        }
@@ -559,8 +562,6 @@ void __init of_irq_init(const struct of_device_id *matches)
                         * its children can get processed in a subsequent pass.
                         */
                        list_add_tail(&desc->list, &intc_parent_list);
-
-                       of_node_set_flag(desc->dev, OF_POPULATED);
                }
 
                /* Get the next pending parent that might have children */
index 8aa1976..f39ccd5 100644 (file)
@@ -497,6 +497,7 @@ int of_platform_default_populate(struct device_node *root,
 }
 EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
+#ifndef CONFIG_PPC
 static int __init of_platform_default_populate_init(void)
 {
        struct device_node *node;
@@ -521,6 +522,7 @@ static int __init of_platform_default_populate_init(void)
        return 0;
 }
 arch_initcall_sync(of_platform_default_populate_init);
+#endif
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
index 5f4a2e0..add6623 100644 (file)
@@ -44,6 +44,7 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
        bridge->release_fn = release_fn;
        bridge->release_data = release_data;
 }
+EXPORT_SYMBOL_GPL(pci_set_host_bridge_release);
 
 void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
                             struct resource *res)
index a02981e..98f1222 100644 (file)
@@ -1069,7 +1069,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
                nvec = maxvec;
 
        for (;;) {
-               if (!(flags & PCI_IRQ_NOAFFINITY)) {
+               if (flags & PCI_IRQ_AFFINITY) {
                        dev->irq_affinity = irq_create_affinity_mask(&nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
@@ -1105,7 +1105,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
  **/
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 {
-       return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
+       return __pci_enable_msi_range(dev, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msi_range);
 
@@ -1120,7 +1120,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
                return -ERANGE;
 
        for (;;) {
-               if (!(flags & PCI_IRQ_NOAFFINITY)) {
+               if (flags & PCI_IRQ_AFFINITY) {
                        dev->irq_affinity = irq_create_affinity_mask(&nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
@@ -1160,8 +1160,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
                int minvec, int maxvec)
 {
-       return __pci_enable_msix_range(dev, entries, minvec, maxvec,
-                       PCI_IRQ_NOAFFINITY);
+       return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
@@ -1187,22 +1186,25 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 {
        int vecs = -ENOSPC;
 
-       if (!(flags & PCI_IRQ_NOMSIX)) {
+       if (flags & PCI_IRQ_MSIX) {
                vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
                                flags);
                if (vecs > 0)
                        return vecs;
        }
 
-       if (!(flags & PCI_IRQ_NOMSI)) {
+       if (flags & PCI_IRQ_MSI) {
                vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
                if (vecs > 0)
                        return vecs;
        }
 
        /* use legacy irq if allowed */
-       if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1)
+       if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) {
+               pci_intx(dev, 1);
                return 1;
+       }
+
        return vecs;
 }
 EXPORT_SYMBOL(pci_alloc_irq_vectors);
@@ -1411,6 +1413,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
        if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
                pci_msi_domain_update_chip_ops(info);
 
+       info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
        domain = msi_create_irq_domain(fwnode, info, parent);
        if (!domain)
                return NULL;
index 6ccb994..c494613 100644 (file)
@@ -688,7 +688,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
        return 0;
 }
 
-static DEFINE_MUTEX(arm_pmu_mutex);
+static DEFINE_SPINLOCK(arm_pmu_lock);
 static LIST_HEAD(arm_pmu_list);
 
 /*
@@ -701,7 +701,7 @@ static int arm_perf_starting_cpu(unsigned int cpu)
 {
        struct arm_pmu *pmu;
 
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
        list_for_each_entry(pmu, &arm_pmu_list, entry) {
 
                if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
@@ -709,7 +709,7 @@ static int arm_perf_starting_cpu(unsigned int cpu)
                if (pmu->reset)
                        pmu->reset(pmu);
        }
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
        return 0;
 }
 
@@ -821,9 +821,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
        if (!cpu_hw_events)
                return -ENOMEM;
 
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
        list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
 
        err = cpu_pm_pmu_register(cpu_pmu);
        if (err)
@@ -859,9 +859,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
        return 0;
 
 out_unregister:
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
        list_del(&cpu_pmu->entry);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
        free_percpu(cpu_hw_events);
        return err;
 }
@@ -869,9 +869,9 @@ out_unregister:
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
        cpu_pm_pmu_unregister(cpu_pmu);
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
        list_del(&cpu_pmu->entry);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
        free_percpu(cpu_pmu->hw_events);
 }
 
@@ -967,11 +967,12 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 
        /* If we didn't manage to parse anything, try the interrupt affinity */
        if (cpumask_weight(&pmu->supported_cpus) == 0) {
-               if (!using_spi) {
+               int irq = platform_get_irq(pdev, 0);
+
+               if (irq_is_percpu(irq)) {
                        /* If using PPIs, check the affinity of the partition */
-                       int ret, irq;
+                       int ret;
 
-                       irq = platform_get_irq(pdev, 0);
                        ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
                        if (ret) {
                                kfree(irqs);
index eb4990f..7fb7656 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/bitops.h>
 #include <linux/err.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pinctrl/pinconf.h>
index 11623c6..44e69c9 100644 (file)
@@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev)
                return PTR_ERR(pc->pcdev);
        }
 
-       ret = meson_gpiolib_register(pc);
-       if (ret) {
-               pinctrl_unregister(pc->pcdev);
-               return ret;
-       }
-
-       return 0;
+       return meson_gpiolib_register(pc);
 }
 
 static struct platform_driver meson_pinctrl_driver = {
index 634b4d3..b3e7723 100644 (file)
@@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
 
        spin_lock_irqsave(&gpio_dev->lock, flags);
        pin_reg = readl(gpio_dev->base + offset * 4);
-       /*
-        * Suppose BIOS or Bootloader sets specific debounce for the
-        * GPIO. if not, set debounce to be  2.75ms and remove glitch.
-       */
-       if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-               pin_reg |= 0xf;
-               pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-               pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
-               pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-       }
-
        pin_reg &= ~BIT(OUTPUT_ENABLE_OFF);
        writel(pin_reg, gpio_dev->base + offset * 4);
        spin_unlock_irqrestore(&gpio_dev->lock, flags);
@@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d)
 
        spin_lock_irqsave(&gpio_dev->lock, flags);
        pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
-       /*
-               Suppose BIOS or Bootloader sets specific debounce for the
-               GPIO. if not, set debounce to be  2.75ms.
-       */
-       if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-               pin_reg |= 0xf;
-               pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-               pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-       }
        pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
        pin_reg |= BIT(INTERRUPT_MASK_OFF);
        writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
index c6d410e..7bad200 100644 (file)
@@ -1432,7 +1432,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev)
 {
        struct pistachio_pinctrl *pctl;
        struct resource *res;
-       int ret;
 
        pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL);
        if (!pctl)
@@ -1464,13 +1463,7 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev)
                return PTR_ERR(pctl->pctldev);
        }
 
-       ret = pistachio_gpio_register(pctl);
-       if (ret < 0) {
-               pinctrl_unregister(pctl->pctldev);
-               return ret;
-       }
-
-       return 0;
+       return pistachio_gpio_register(pctl);
 }
 
 static struct platform_driver pistachio_pinctrl_driver = {
index f99b183..374a802 100644 (file)
@@ -1,6 +1,8 @@
 /*
  * Generic driver for the OLPC Embedded Controller.
  *
+ * Author: Andres Salomon <dilinger@queued.net>
+ *
  * Copyright (C) 2011-2012 One Laptop per Child Foundation.
  *
  * Licensed under the GPL v2 or later.
@@ -12,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/list.h>
 #include <linux/olpc-ec.h>
 #include <asm/olpc.h>
@@ -326,8 +328,4 @@ static int __init olpc_ec_init_module(void)
 {
        return platform_driver_register(&olpc_ec_plat_driver);
 }
-
 arch_initcall(olpc_ec_init_module);
-
-MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
-MODULE_LICENSE("GPL");
index d2bc092..da2fe18 100644 (file)
@@ -110,8 +110,8 @@ static const struct key_entry dell_wmi_keymap_type_0000[] __initconst = {
        /* BIOS error detected */
        { KE_IGNORE, 0xe00d, { KEY_RESERVED } },
 
-       /* Unknown, defined in ACPI DSDT */
-       /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
+       /* Battery was removed or inserted */
+       { KE_IGNORE, 0xe00e, { KEY_RESERVED } },
 
        /* Wifi Catcher */
        { KE_KEY,    0xe011, { KEY_PROG2 } },
index 63b371d..91ae585 100644 (file)
@@ -1,6 +1,8 @@
 /* Moorestown PMIC GPIO (access through IPC) driver
  * Copyright (c) 2008 - 2009, Intel Corporation.
  *
+ * Author: Alek Du <alek.du@intel.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -21,7 +23,6 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -322,9 +323,4 @@ static int __init platform_pmic_gpio_init(void)
 {
        return platform_driver_register(&platform_pmic_gpio_driver);
 }
-
 subsys_initcall(platform_pmic_gpio_init);
-
-MODULE_AUTHOR("Alek Du <alek.du@intel.com>");
-MODULE_DESCRIPTION("Intel Moorestown PMIC GPIO driver");
-MODULE_LICENSE("GPL v2");
index 9c65f13..da7a75f 100644 (file)
@@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip,
 }
 
 static inline void max17042_read_model_data(struct max17042_chip *chip,
-                                       u8 addr, u32 *data, int size)
+                                       u8 addr, u16 *data, int size)
 {
        struct regmap *map = chip->regmap;
        int i;
+       u32 tmp;
 
-       for (i = 0; i < size; i++)
-               regmap_read(map, addr + i, &data[i]);
+       for (i = 0; i < size; i++) {
+               regmap_read(map, addr + i, &tmp);
+               data[i] = (u16)tmp;
+       }
 }
 
 static inline int max17042_model_data_compare(struct max17042_chip *chip,
@@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip)
 {
        int ret;
        int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-       u32 *temp_data;
+       u16 *temp_data;
 
        temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
        if (!temp_data)
@@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip)
        ret = max17042_model_data_compare(
                chip,
                chip->pdata->config_data->cell_char_tbl,
-               (u16 *)temp_data,
+               temp_data,
                table_size);
 
        max10742_lock_model(chip);
@@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip)
 {
        int i;
        int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-       u32 *temp_data;
+       u16 *temp_data;
        int ret = 0;
 
        temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
index 3bfac53..c74c3f6 100644 (file)
@@ -200,8 +200,8 @@ config REBOOT_MODE
 config SYSCON_REBOOT_MODE
        tristate "Generic SYSCON regmap reboot mode driver"
        depends on OF
+       depends on MFD_SYSCON
        select REBOOT_MODE
-       select MFD_SYSCON
        help
          Say y here will enable reboot mode driver. This will
          get reboot mode arguments and store it in SYSCON mapped
index 9ab7f56..f69387e 100644 (file)
@@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev)
 
        if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
                pr_err("failed to find reboot-offset property\n");
+               iounmap(base);
                return -EINVAL;
        }
 
        err = register_restart_handler(&hisi_restart_nb);
-       if (err)
+       if (err) {
                dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
                        err);
+               iounmap(base);
+       }
 
        return err;
 }
index 73dfae4..4c56e54 100644 (file)
@@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
        if (!charger)
                return -ENOMEM;
 
+       platform_set_drvdata(pdev, charger);
        charger->tps = tps;
        charger->dev = &pdev->dev;
 
index cecc15a..3fa17ac 100644 (file)
@@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch)
 static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
                                           long timeout)
 {
-       struct rio_channel *ch = NULL;
-       struct rio_channel *new_ch = NULL;
+       struct rio_channel *ch;
+       struct rio_channel *new_ch;
        struct conn_req *req;
        struct cm_peer *peer;
        int found = 0;
@@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
 
        spin_unlock_bh(&ch->lock);
        riocm_put_channel(ch);
+       ch = NULL;
        kfree(req);
 
        down_read(&rdev_sem);
@@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
        if (!found) {
                /* If peer device object not found, simply ignore the request */
                err = -ENODEV;
-               goto err_nodev;
+               goto err_put_new_ch;
        }
 
        new_ch->rdev = peer->rdev;
@@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
 
        *new_ch_id = new_ch->id;
        return new_ch;
+
+err_put_new_ch:
+       spin_lock_bh(&idr_lock);
+       idr_remove(&ch_idr, new_ch->id);
+       spin_unlock_bh(&idr_lock);
+       riocm_put_channel(new_ch);
+
 err_put:
-       riocm_put_channel(ch);
-err_nodev:
-       if (new_ch) {
-               spin_lock_bh(&idr_lock);
-               idr_remove(&ch_idr, new_ch->id);
-               spin_unlock_bh(&idr_lock);
-               riocm_put_channel(new_ch);
-       }
+       if (ch)
+               riocm_put_channel(ch);
        *new_ch_id = 0;
        return ERR_PTR(err);
 }
index 8973d34..fb1b56a 100644 (file)
@@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        u8 *sense = NULL;
        int expires;
 
+       cqr = (struct dasd_ccw_req *) intparm;
        if (IS_ERR(irb)) {
                switch (PTR_ERR(irb)) {
                case -EIO:
+                       if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) {
+                               device = (struct dasd_device *) cqr->startdev;
+                               cqr->status = DASD_CQR_CLEARED;
+                               dasd_device_clear_timer(device);
+                               wake_up(&dasd_flush_wq);
+                               dasd_schedule_device_bh(device);
+                               return;
+                       }
                        break;
                case -ETIMEDOUT:
                        DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
@@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        }
 
        now = get_tod_clock();
-       cqr = (struct dasd_ccw_req *) intparm;
        /* check for conditions that should be handled immediately */
        if (!cqr ||
            !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
index fd2eff4..98bbec4 100644 (file)
@@ -5078,6 +5078,8 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
                return PTR_ERR(cqr);
        }
 
+       cqr->lpm = lpum;
+retry:
        cqr->startdev = device;
        cqr->memdev = device;
        cqr->block = NULL;
@@ -5122,6 +5124,14 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
                        (prssdp + 1);
                memcpy(messages, message_buf,
                       sizeof(struct dasd_rssd_messages));
+       } else if (cqr->lpm) {
+               /*
+                * on z/VM we might not be able to do I/O on the requested path
+                * but instead we get the required information on any path
+                * so retry with open path mask
+                */
+               cqr->lpm = 0;
+               goto retry;
        } else
                DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
                                "Reading messages failed with rc=%d\n"
index 7ada078..6a58bc8 100644 (file)
@@ -762,7 +762,6 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
        priv->state = DEV_STATE_NOT_OPER;
        priv->dev_id.devno = sch->schib.pmcw.dev;
        priv->dev_id.ssid = sch->schid.ssid;
-       priv->schid = sch->schid;
 
        INIT_WORK(&priv->todo_work, ccw_device_todo);
        INIT_LIST_HEAD(&priv->cmb_list);
@@ -1000,7 +999,6 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev,
        put_device(&old_sch->dev);
        /* Initialize new subchannel. */
        spin_lock_irq(sch->lock);
-       cdev->private->schid = sch->schid;
        cdev->ccwlock = sch->lock;
        if (!sch_is_pseudo_sch(sch))
                sch_set_cdev(sch, cdev);
index 15b56a1..9bc3512 100644 (file)
@@ -26,6 +26,7 @@
 static void
 ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
 {
+       struct subchannel *sch = to_subchannel(cdev->dev.parent);
        char dbf_text[15];
 
        if (!scsw_is_valid_cstat(&irb->scsw) ||
@@ -36,10 +37,10 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
                      "received"
                      " ... device %04x on subchannel 0.%x.%04x, dev_stat "
                      ": %02X sch_stat : %02X\n",
-                     cdev->private->dev_id.devno, cdev->private->schid.ssid,
-                     cdev->private->schid.sch_no,
+                     cdev->private->dev_id.devno, sch->schid.ssid,
+                     sch->schid.sch_no,
                      scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw));
-       sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no);
+       sprintf(dbf_text, "chk%x", sch->schid.sch_no);
        CIO_TRACE_EVENT(0, dbf_text);
        CIO_HEX_EVENT(0, irb, sizeof(struct irb));
 }
index 8975060..220f491 100644 (file)
@@ -120,7 +120,6 @@ struct ccw_device_private {
        int state;              /* device state */
        atomic_t onoff;
        struct ccw_dev_id dev_id;       /* device id */
-       struct subchannel_id schid;     /* subchannel number */
        struct ccw_request req;         /* internal I/O request */
        int iretry;
        u8 pgid_valid_mask;     /* mask of valid PGIDs */
index 4bb5262..71bf9bd 100644 (file)
@@ -686,6 +686,15 @@ static void qdio_kick_handler(struct qdio_q *q)
        q->qdio_error = 0;
 }
 
+static inline int qdio_tasklet_schedule(struct qdio_q *q)
+{
+       if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) {
+               tasklet_schedule(&q->tasklet);
+               return 0;
+       }
+       return -EPERM;
+}
+
 static void __qdio_inbound_processing(struct qdio_q *q)
 {
        qperf_inc(q, tasklet_inbound);
@@ -698,10 +707,8 @@ static void __qdio_inbound_processing(struct qdio_q *q)
        if (!qdio_inbound_q_done(q)) {
                /* means poll time is not yet over */
                qperf_inc(q, tasklet_inbound_resched);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-                       tasklet_schedule(&q->tasklet);
+               if (!qdio_tasklet_schedule(q))
                        return;
-               }
        }
 
        qdio_stop_polling(q);
@@ -711,8 +718,7 @@ static void __qdio_inbound_processing(struct qdio_q *q)
         */
        if (!qdio_inbound_q_done(q)) {
                qperf_inc(q, tasklet_inbound_resched2);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-                       tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
        }
 }
 
@@ -869,16 +875,15 @@ static void __qdio_outbound_processing(struct qdio_q *q)
         * is noticed and outbound_handler is called after some time.
         */
        if (qdio_outbound_q_done(q))
-               del_timer(&q->u.out.timer);
+               del_timer_sync(&q->u.out.timer);
        else
-               if (!timer_pending(&q->u.out.timer))
+               if (!timer_pending(&q->u.out.timer) &&
+                   likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
                        mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
        return;
 
 sched:
-       if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-               return;
-       tasklet_schedule(&q->tasklet);
+       qdio_tasklet_schedule(q);
 }
 
 /* outbound tasklet */
@@ -892,9 +897,7 @@ void qdio_outbound_timer(unsigned long data)
 {
        struct qdio_q *q = (struct qdio_q *)data;
 
-       if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-               return;
-       tasklet_schedule(&q->tasklet);
+       qdio_tasklet_schedule(q);
 }
 
 static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
@@ -907,7 +910,7 @@ static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
 
        for_each_output_queue(q->irq_ptr, out, i)
                if (!qdio_outbound_q_done(out))
-                       tasklet_schedule(&out->tasklet);
+                       qdio_tasklet_schedule(out);
 }
 
 static void __tiqdio_inbound_processing(struct qdio_q *q)
@@ -929,10 +932,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 
        if (!qdio_inbound_q_done(q)) {
                qperf_inc(q, tasklet_inbound_resched);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-                       tasklet_schedule(&q->tasklet);
+               if (!qdio_tasklet_schedule(q))
                        return;
-               }
        }
 
        qdio_stop_polling(q);
@@ -942,8 +943,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
         */
        if (!qdio_inbound_q_done(q)) {
                qperf_inc(q, tasklet_inbound_resched2);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-                       tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
        }
 }
 
@@ -977,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
        int i;
        struct qdio_q *q;
 
-       if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
+       if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
                return;
 
        for_each_input_queue(irq_ptr, q, i) {
@@ -1003,7 +1003,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
                        continue;
                if (need_siga_sync(q) && need_siga_sync_out_after_pci(q))
                        qdio_siga_sync_q(q);
-               tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
        }
 }
 
@@ -1066,10 +1066,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
                      struct irb *irb)
 {
        struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
        int cstat, dstat;
 
        if (!intparm || !irq_ptr) {
-               DBF_ERROR("qint:%4x", cdev->private->schid.sch_no);
+               ccw_device_get_schid(cdev, &schid);
+               DBF_ERROR("qint:%4x", schid.sch_no);
                return;
        }
 
@@ -1122,12 +1124,14 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 int qdio_get_ssqd_desc(struct ccw_device *cdev,
                       struct qdio_ssqd_desc *data)
 {
+       struct subchannel_id schid;
 
        if (!cdev || !cdev->private)
                return -EINVAL;
 
-       DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no);
-       return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("get ssqd:%4x", schid.sch_no);
+       return qdio_setup_get_ssqd(NULL, &schid, data);
 }
 EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
 
@@ -1141,7 +1145,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev)
                tasklet_kill(&q->tasklet);
 
        for_each_output_queue(irq_ptr, q, i) {
-               del_timer(&q->u.out.timer);
+               del_timer_sync(&q->u.out.timer);
                tasklet_kill(&q->tasklet);
        }
 }
@@ -1154,14 +1158,15 @@ static void qdio_shutdown_queues(struct ccw_device *cdev)
 int qdio_shutdown(struct ccw_device *cdev, int how)
 {
        struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
        int rc;
-       unsigned long flags;
 
        if (!irq_ptr)
                return -ENODEV;
 
        WARN_ON_ONCE(irqs_disabled());
-       DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qshutdown:%4x", schid.sch_no);
 
        mutex_lock(&irq_ptr->setup_mutex);
        /*
@@ -1184,7 +1189,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
        qdio_shutdown_debug_entries(irq_ptr);
 
        /* cleanup subchannel */
-       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
 
        if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
                rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
@@ -1198,12 +1203,12 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
        }
 
        qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
        wait_event_interruptible_timeout(cdev->private->wait_q,
                irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
                irq_ptr->state == QDIO_IRQ_STATE_ERR,
                10 * HZ);
-       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
 
 no_cleanup:
        qdio_shutdown_thinint(irq_ptr);
@@ -1211,7 +1216,7 @@ no_cleanup:
        /* restore interrupt handler */
        if ((void *)cdev->handler == (void *)qdio_int_handler)
                cdev->handler = irq_ptr->orig_handler;
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
 
        qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
        mutex_unlock(&irq_ptr->setup_mutex);
@@ -1228,11 +1233,13 @@ EXPORT_SYMBOL_GPL(qdio_shutdown);
 int qdio_free(struct ccw_device *cdev)
 {
        struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
 
        if (!irq_ptr)
                return -ENODEV;
 
-       DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qfree:%4x", schid.sch_no);
        DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned");
        mutex_lock(&irq_ptr->setup_mutex);
 
@@ -1251,9 +1258,11 @@ EXPORT_SYMBOL_GPL(qdio_free);
  */
 int qdio_allocate(struct qdio_initialize *init_data)
 {
+       struct subchannel_id schid;
        struct qdio_irq *irq_ptr;
 
-       DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no);
+       ccw_device_get_schid(init_data->cdev, &schid);
+       DBF_EVENT("qallocate:%4x", schid.sch_no);
 
        if ((init_data->no_input_qs && !init_data->input_handler) ||
            (init_data->no_output_qs && !init_data->output_handler))
@@ -1331,20 +1340,18 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr)
  */
 int qdio_establish(struct qdio_initialize *init_data)
 {
-       struct qdio_irq *irq_ptr;
        struct ccw_device *cdev = init_data->cdev;
-       unsigned long saveflags;
+       struct subchannel_id schid;
+       struct qdio_irq *irq_ptr;
        int rc;
 
-       DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qestablish:%4x", schid.sch_no);
 
        irq_ptr = cdev->private->qdio_data;
        if (!irq_ptr)
                return -ENODEV;
 
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-
        mutex_lock(&irq_ptr->setup_mutex);
        qdio_setup_irq(init_data);
 
@@ -1361,17 +1368,14 @@ int qdio_establish(struct qdio_initialize *init_data)
        irq_ptr->ccw.count = irq_ptr->equeue.count;
        irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
 
-       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
        ccw_device_set_options_mask(cdev, 0);
 
        rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
        if (rc) {
                DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
                DBF_ERROR("rc:%4x", rc);
-       }
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-       if (rc) {
                mutex_unlock(&irq_ptr->setup_mutex);
                qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
                return rc;
@@ -1407,19 +1411,17 @@ EXPORT_SYMBOL_GPL(qdio_establish);
  */
 int qdio_activate(struct ccw_device *cdev)
 {
+       struct subchannel_id schid;
        struct qdio_irq *irq_ptr;
        int rc;
-       unsigned long saveflags;
 
-       DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qactivate:%4x", schid.sch_no);
 
        irq_ptr = cdev->private->qdio_data;
        if (!irq_ptr)
                return -ENODEV;
 
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-
        mutex_lock(&irq_ptr->setup_mutex);
        if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
                rc = -EBUSY;
@@ -1431,19 +1433,17 @@ int qdio_activate(struct ccw_device *cdev)
        irq_ptr->ccw.count = irq_ptr->aqueue.count;
        irq_ptr->ccw.cda = 0;
 
-       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
        ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
 
        rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
                              0, DOIO_DENY_PREFETCH);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
        if (rc) {
                DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no);
                DBF_ERROR("rc:%4x", rc);
-       }
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-       if (rc)
                goto out;
+       }
 
        if (is_thinint_irq(irq_ptr))
                tiqdio_add_input_queues(irq_ptr);
@@ -1585,10 +1585,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
 
        /* in case of SIGA errors we must process the error immediately */
        if (used >= q->u.out.scan_threshold || rc)
-               tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
        else
                /* free the SBALs in case of no further traffic */
-               if (!timer_pending(&q->u.out.timer))
+               if (!timer_pending(&q->u.out.timer) &&
+                   likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
                        mod_timer(&q->u.out.timer, jiffies + HZ);
        return rc;
 }
index 241891a..df40692 100644 (file)
@@ -6,4 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
index 1d060fd..5e5c11f 100644 (file)
@@ -458,6 +458,8 @@ static int __init kvm_devices_init(void)
        if (test_devices_support(total_memory_size) < 0)
                return -ENODEV;
 
+       pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
        rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
        if (rc)
                return rc;
@@ -482,7 +484,7 @@ static int __init kvm_devices_init(void)
 }
 
 /* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
 {
        char scratch[17];
        unsigned int len = count;
index b381b37..5648b71 100644 (file)
@@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
        struct fib *fibptr;
        struct hw_fib * hw_fib = (struct hw_fib *)0;
        dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
-       unsigned size;
+       unsigned int size, osize;
        int retval;
 
        if (dev->in_reset) {
@@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
         *      will not overrun the buffer when we copy the memory. Return
         *      an error if we would.
         */
-       size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
+       osize = size = le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr);
        if (size < le16_to_cpu(kfib->header.SenderSize))
                size = le16_to_cpu(kfib->header.SenderSize);
        if (size > dev->max_fib_size) {
@@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
                goto cleanup;
        }
 
+       /* Sanity check the second copy */
+       if ((osize != le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr))
+               || (size < le16_to_cpu(kfib->header.SenderSize))) {
+               retval = -EINVAL;
+               goto cleanup;
+       }
+
        if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
                aac_adapter_interrupt(dev);
                /*
index a569c65..dcf3653 100644 (file)
@@ -2923,7 +2923,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
        mutex_unlock(&fip->ctlr_mutex);
 
 drop:
-       kfree(skb);
+       kfree_skb(skb);
        return rc;
 }
 
index bf85974..17d04c7 100644 (file)
@@ -10410,8 +10410,11 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
                __ipr_remove(pdev);
                return rc;
        }
+       spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
+       ioa_cfg->scan_enabled = 1;
+       schedule_work(&ioa_cfg->work_q);
+       spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 
-       scsi_scan_host(ioa_cfg->host);
        ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
@@ -10421,10 +10424,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
                }
        }
 
-       spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
-       ioa_cfg->scan_enabled = 1;
-       schedule_work(&ioa_cfg->work_q);
-       spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
+       scsi_scan_host(ioa_cfg->host);
+
        return 0;
 }
 
index 2dab3dc..c1ed25a 100644 (file)
@@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
        /* Find first memory bar */
        bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM);
        instance->bar = find_first_bit(&bar_list, sizeof(unsigned long));
-       if (pci_request_selected_regions(instance->pdev, instance->bar,
+       if (pci_request_selected_regions(instance->pdev, 1<<instance->bar,
                                         "megasas: LSI")) {
                dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n");
                return -EBUSY;
@@ -5339,7 +5339,7 @@ fail_ready_state:
        iounmap(instance->reg_set);
 
       fail_ioremap:
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 
        return -EINVAL;
 }
@@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance)
 
        iounmap(instance->reg_set);
 
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
index ec83754..52d8bbf 100644 (file)
@@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance)
 
        iounmap(instance->reg_set);
 
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
index 751f13e..750f82c 100644 (file)
@@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
        } else
                ioc->msix96_vector = 0;
 
+       if (ioc->is_warpdrive) {
+               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
+                   &ioc->chip->ReplyPostHostIndex;
+
+               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
+                       ioc->reply_post_host_index[i] =
+                       (resource_size_t __iomem *)
+                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
+                       * 4)));
+       }
+
        list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
                pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
                    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
@@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        if (r)
                goto out_free_resources;
 
-       if (ioc->is_warpdrive) {
-               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
-                   &ioc->chip->ReplyPostHostIndex;
-
-               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
-                       ioc->reply_post_host_index[i] =
-                       (resource_size_t __iomem *)
-                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
-                       * 4)));
-       }
-
        pci_set_drvdata(ioc->pdev, ioc->shost);
        r = _base_get_ioc_facts(ioc, CAN_SLEEP);
        if (r)
index 53ef1cb..0e8601a 100644 (file)
@@ -778,6 +778,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
        if (!edev)
                return;
 
+       enclosure_unregister(edev);
+
        ses_dev = edev->scratch;
        edev->scratch = NULL;
 
@@ -789,7 +791,6 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
        kfree(edev->component[0].scratch);
 
        put_device(&edev->edev);
-       enclosure_unregister(edev);
 }
 
 static void ses_intf_remove(struct device *cdev,
index 1b4ff0f..ed5dd0e 100644 (file)
@@ -426,6 +426,7 @@ clock_cooling_register(struct device *dev, const char *clock_name)
        if (!ccdev)
                return ERR_PTR(-ENOMEM);
 
+       mutex_init(&ccdev->lock);
        ccdev->dev = dev;
        ccdev->clk = devm_clk_get(dev, clock_name);
        if (IS_ERR(ccdev->clk))
index 3788ed7..a32b417 100644 (file)
@@ -740,12 +740,22 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 }
 
 /* Bind cpufreq callbacks to thermal cooling device ops */
+
 static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
        .get_max_state = cpufreq_get_max_state,
        .get_cur_state = cpufreq_get_cur_state,
        .set_cur_state = cpufreq_set_cur_state,
 };
 
+static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
+       .get_max_state          = cpufreq_get_max_state,
+       .get_cur_state          = cpufreq_get_cur_state,
+       .set_cur_state          = cpufreq_set_cur_state,
+       .get_requested_power    = cpufreq_get_requested_power,
+       .state2power            = cpufreq_state2power,
+       .power2state            = cpufreq_power2state,
+};
+
 /* Notifier for cpufreq policy change */
 static struct notifier_block thermal_cpufreq_notifier_block = {
        .notifier_call = cpufreq_thermal_notifier,
@@ -795,6 +805,7 @@ __cpufreq_cooling_register(struct device_node *np,
        struct cpumask temp_mask;
        unsigned int freq, i, num_cpus;
        int ret;
+       struct thermal_cooling_device_ops *cooling_ops;
 
        cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
        policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
@@ -850,10 +861,6 @@ __cpufreq_cooling_register(struct device_node *np,
        cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
        if (capacitance) {
-               cpufreq_cooling_ops.get_requested_power =
-                       cpufreq_get_requested_power;
-               cpufreq_cooling_ops.state2power = cpufreq_state2power;
-               cpufreq_cooling_ops.power2state = cpufreq_power2state;
                cpufreq_dev->plat_get_static_power = plat_static_func;
 
                ret = build_dyn_power_table(cpufreq_dev, capacitance);
@@ -861,6 +868,10 @@ __cpufreq_cooling_register(struct device_node *np,
                        cool_dev = ERR_PTR(ret);
                        goto free_table;
                }
+
+               cooling_ops = &cpufreq_power_cooling_ops;
+       } else {
+               cooling_ops = &cpufreq_cooling_ops;
        }
 
        ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
@@ -885,7 +896,7 @@ __cpufreq_cooling_register(struct device_node *np,
                 cpufreq_dev->id);
 
        cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-                                                     &cpufreq_cooling_ops);
+                                                     cooling_ops);
        if (IS_ERR(cool_dev))
                goto remove_idr;
 
index 34fe365..68bd1b5 100644 (file)
@@ -116,7 +116,9 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
                instance->target = get_target_state(tz, cdev, percentage,
                                                    cur_trip_level);
 
+               mutex_lock(&instance->cdev->lock);
                instance->cdev->updated = false;
+               mutex_unlock(&instance->cdev->lock);
                thermal_cdev_update(cdev);
        }
        return 0;
index fc52016..bb118a1 100644 (file)
@@ -71,7 +71,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                dev_dbg(&instance->cdev->device, "target=%d\n",
                                        (int)instance->target);
 
+               mutex_lock(&instance->cdev->lock);
                instance->cdev->updated = false; /* cdev needs update */
+               mutex_unlock(&instance->cdev->lock);
        }
 
        mutex_unlock(&tz->lock);
index c5547bd..e473548 100644 (file)
@@ -471,8 +471,6 @@ MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
 
 static int imx_thermal_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id =
-               of_match_device(of_imx_thermal_match, &pdev->dev);
        struct imx_thermal_data *data;
        struct regmap *map;
        int measure_freq;
@@ -490,7 +488,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
        }
        data->tempmon = map;
 
-       data->socdata = of_id->data;
+       data->socdata = of_device_get_match_data(&pdev->dev);
 
        /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
        if (data->socdata->version == TEMPMON_IMX6SX) {
index a578cd2..1891f34 100644 (file)
@@ -225,7 +225,6 @@ static struct platform_driver int3406_thermal_driver = {
        .remove = int3406_thermal_remove,
        .driver = {
                   .name = "int3406 thermal",
-                  .owner = THIS_MODULE,
                   .acpi_match_table = int3406_thermal_match,
                   },
 };
index 6a6ec1c..9b4815e 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/thermal.h>
+#include <linux/pm.h>
 
 /* Intel PCH thermal Device IDs */
 #define PCH_THERMAL_DID_WPT    0x9CA4 /* Wildcat Point */
@@ -65,6 +66,7 @@ struct pch_thermal_device {
        unsigned long crt_temp;
        int hot_trip_id;
        unsigned long hot_temp;
+       bool bios_enabled;
 };
 
 static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
@@ -75,8 +77,10 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
        *nr_trips = 0;
 
        /* Check if BIOS has already enabled thermal sensor */
-       if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))
+       if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) {
+               ptd->bios_enabled = true;
                goto read_trips;
+       }
 
        tsel = readb(ptd->hw_base + WPT_TSEL);
        /*
@@ -130,9 +134,39 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
        return 0;
 }
 
+static int pch_wpt_suspend(struct pch_thermal_device *ptd)
+{
+       u8 tsel;
+
+       if (ptd->bios_enabled)
+               return 0;
+
+       tsel = readb(ptd->hw_base + WPT_TSEL);
+
+       writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+
+       return 0;
+}
+
+static int pch_wpt_resume(struct pch_thermal_device *ptd)
+{
+       u8 tsel;
+
+       if (ptd->bios_enabled)
+               return 0;
+
+       tsel = readb(ptd->hw_base + WPT_TSEL);
+
+       writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+
+       return 0;
+}
+
 struct pch_dev_ops {
        int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
        int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
+       int (*suspend)(struct pch_thermal_device *ptd);
+       int (*resume)(struct pch_thermal_device *ptd);
 };
 
 
@@ -140,6 +174,8 @@ struct pch_dev_ops {
 static const struct pch_dev_ops pch_dev_ops_wpt = {
        .hw_init = pch_wpt_init,
        .get_temp = pch_wpt_get_temp,
+       .suspend = pch_wpt_suspend,
+       .resume = pch_wpt_resume,
 };
 
 static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
@@ -269,6 +305,22 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
        pci_disable_device(pdev);
 }
 
+static int intel_pch_thermal_suspend(struct device *device)
+{
+       struct pci_dev *pdev = to_pci_dev(device);
+       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+       return ptd->ops->suspend(ptd);
+}
+
+static int intel_pch_thermal_resume(struct device *device)
+{
+       struct pci_dev *pdev = to_pci_dev(device);
+       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+       return ptd->ops->resume(ptd);
+}
+
 static struct pci_device_id intel_pch_thermal_id[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
@@ -276,11 +328,17 @@ static struct pci_device_id intel_pch_thermal_id[] = {
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
 
+static const struct dev_pm_ops intel_pch_pm_ops = {
+       .suspend = intel_pch_thermal_suspend,
+       .resume = intel_pch_thermal_resume,
+};
+
 static struct pci_driver intel_pch_thermal_driver = {
        .name           = "intel_pch_thermal",
        .id_table       = intel_pch_thermal_id,
        .probe          = intel_pch_thermal_probe,
        .remove         = intel_pch_thermal_remove,
+       .driver.pm      = &intel_pch_pm_ops,
 };
 
 module_pci_driver(intel_pch_thermal_driver);
index 015ce2e..0e4dc0a 100644 (file)
@@ -388,7 +388,7 @@ static int clamp_thread(void *arg)
                int sleeptime;
                unsigned long target_jiffies;
                unsigned int guard;
-               unsigned int compensation = 0;
+               unsigned int compensated_ratio;
                int interval; /* jiffies to sleep for each attempt */
                unsigned int duration_jiffies = msecs_to_jiffies(duration);
                unsigned int window_size_now;
@@ -409,8 +409,11 @@ static int clamp_thread(void *arg)
                 * c-states, thus we need to compensate the injected idle ratio
                 * to achieve the actual target reported by the HW.
                 */
-               compensation = get_compensation(target_ratio);
-               interval = duration_jiffies*100/(target_ratio+compensation);
+               compensated_ratio = target_ratio +
+                       get_compensation(target_ratio);
+               if (compensated_ratio <= 0)
+                       compensated_ratio = 1;
+               interval = duration_jiffies * 100 / compensated_ratio;
 
                /* align idle time */
                target_jiffies = roundup(jiffies, interval);
@@ -647,8 +650,8 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
                goto exit_set;
        } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
                pr_info("Stop forced idle injection\n");
-               set_target_ratio = 0;
                end_power_clamp();
+               set_target_ratio = 0;
        } else  /* adjust currently running */ {
                set_target_ratio = new_target_ratio;
                /* make new set_target_ratio visible to other cpus */
index 2f1a863..b4d3116 100644 (file)
@@ -529,7 +529,9 @@ static void allow_maximum_power(struct thermal_zone_device *tz)
                        continue;
 
                instance->target = 0;
+               mutex_lock(&instance->cdev->lock);
                instance->cdev->updated = false;
+               mutex_unlock(&instance->cdev->lock);
                thermal_cdev_update(instance->cdev);
        }
 }
index ea9366a..bcef2e7 100644 (file)
@@ -175,7 +175,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                        update_passive_instance(tz, trip_type, -1);
 
                instance->initialized = true;
+               mutex_lock(&instance->cdev->lock);
                instance->cdev->updated = false; /* cdev needs update */
+               mutex_unlock(&instance->cdev->lock);
        }
 
        mutex_unlock(&tz->lock);
index 5133cd1..e2fc616 100644 (file)
@@ -1093,7 +1093,9 @@ int power_actor_set_power(struct thermal_cooling_device *cdev,
                return ret;
 
        instance->target = state;
+       mutex_lock(&cdev->lock);
        cdev->updated = false;
+       mutex_unlock(&cdev->lock);
        thermal_cdev_update(cdev);
 
        return 0;
@@ -1623,11 +1625,13 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
        struct thermal_instance *instance;
        unsigned long target = 0;
 
+       mutex_lock(&cdev->lock);
        /* cooling device is updated*/
-       if (cdev->updated)
+       if (cdev->updated) {
+               mutex_unlock(&cdev->lock);
                return;
+       }
 
-       mutex_lock(&cdev->lock);
        /* Make sure cdev enters the deepest cooling state */
        list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
                dev_dbg(&cdev->device, "zone%d->target=%lu\n",
@@ -1637,9 +1641,9 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
                if (instance->target > target)
                        target = instance->target;
        }
-       mutex_unlock(&cdev->lock);
        cdev->ops->set_cur_state(cdev, target);
        cdev->updated = true;
+       mutex_unlock(&cdev->lock);
        trace_cdev_update(cdev, target);
        dev_dbg(&cdev->device, "set to state %lu\n", target);
 }
index 06fd2ed..c41c774 100644 (file)
@@ -232,6 +232,7 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 
        return result;
 }
+EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs);
 
 void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 {
@@ -270,3 +271,4 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
        hwmon_device_unregister(hwmon->device);
        kfree(hwmon);
 }
+EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs);
index 7191230..0f3f62e 100644 (file)
@@ -1354,7 +1354,6 @@ made_compressed_probe:
        spin_lock_init(&acm->write_lock);
        spin_lock_init(&acm->read_lock);
        mutex_init(&acm->mutex);
-       acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
        acm->is_int_ep = usb_endpoint_xfer_int(epread);
        if (acm->is_int_ep)
                acm->bInterval = epread->bInterval;
@@ -1394,14 +1393,14 @@ made_compressed_probe:
                urb->transfer_dma = rb->dma;
                if (acm->is_int_ep) {
                        usb_fill_int_urb(urb, acm->dev,
-                                        acm->rx_endpoint,
+                                        usb_rcvintpipe(usb_dev, epread->bEndpointAddress),
                                         rb->base,
                                         acm->readsize,
                                         acm_read_bulk_callback, rb,
                                         acm->bInterval);
                } else {
                        usb_fill_bulk_urb(urb, acm->dev,
-                                         acm->rx_endpoint,
+                                         usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
                                          rb->base,
                                          acm->readsize,
                                          acm_read_bulk_callback, rb);
index 05ce308..1f1eabf 100644 (file)
@@ -96,7 +96,6 @@ struct acm {
        struct acm_rb read_buffers[ACM_NR];
        struct acm_wb *putbuffer;                       /* for acm_tty_put_char() */
        int rx_buflimit;
-       int rx_endpoint;
        spinlock_t read_lock;
        int write_used;                                 /* number of non-empty write buffers */
        int transmitting;
index 31ccdcc..0511631 100644 (file)
@@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
                                                        ep, buffer, size);
 }
 
+static const unsigned short low_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 8,
+       [USB_ENDPOINT_XFER_ISOC] = 0,
+       [USB_ENDPOINT_XFER_BULK] = 0,
+       [USB_ENDPOINT_XFER_INT] = 8,
+};
+static const unsigned short full_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1023,
+       [USB_ENDPOINT_XFER_BULK] = 64,
+       [USB_ENDPOINT_XFER_INT] = 64,
+};
+static const unsigned short high_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 512,
+       [USB_ENDPOINT_XFER_INT] = 1023,
+};
+static const unsigned short super_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 512,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 1024,
+       [USB_ENDPOINT_XFER_INT] = 1024,
+};
+
 static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
     int asnum, struct usb_host_interface *ifp, int num_ep,
     unsigned char *buffer, int size)
@@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
        struct usb_endpoint_descriptor *d;
        struct usb_host_endpoint *endpoint;
        int n, i, j, retval;
+       unsigned int maxp;
+       const unsigned short *maxpacket_maxes;
 
        d = (struct usb_endpoint_descriptor *) buffer;
        buffer += d->bLength;
@@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                        endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
        }
 
+       /* Validate the wMaxPacketSize field */
+       maxp = usb_endpoint_maxp(&endpoint->desc);
+
+       /* Find the highest legal maxpacket size for this endpoint */
+       i = 0;          /* additional transactions per microframe */
+       switch (to_usb_device(ddev)->speed) {
+       case USB_SPEED_LOW:
+               maxpacket_maxes = low_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_FULL:
+               maxpacket_maxes = full_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_HIGH:
+               /* Bits 12..11 are allowed only for HS periodic endpoints */
+               if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
+                       i = maxp & (BIT(12) | BIT(11));
+                       maxp &= ~i;
+               }
+               /* fallthrough */
+       default:
+               maxpacket_maxes = high_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_SUPER:
+       case USB_SPEED_SUPER_PLUS:
+               maxpacket_maxes = super_speed_maxpacket_maxes;
+               break;
+       }
+       j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
+
+       if (maxp > j) {
+               dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
+                   cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
+               maxp = j;
+               endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
+       }
+
        /*
         * Some buggy high speed devices have bulk endpoints using
         * maxpacket sizes other than 512.  High speed HCDs may not
@@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
         */
        if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
                        && usb_endpoint_xfer_bulk(d)) {
-               unsigned maxp;
-
-               maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff;
                if (maxp != 512)
                        dev_warn(ddev, "config %d interface %d altsetting %d "
                                "bulk endpoint 0x%X has invalid maxpacket %d\n",
index e9f5043..e6a6d67 100644 (file)
@@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
                goto error_decrease_mem;
        }
 
-       mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle);
+       mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
+                       &dma_handle);
        if (!mem) {
                ret = -ENOMEM;
                goto error_free_usbm;
@@ -2582,7 +2583,9 @@ static unsigned int usbdev_poll(struct file *file,
        if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
                mask |= POLLOUT | POLLWRNORM;
        if (!connected(ps))
-               mask |= POLLERR | POLLHUP;
+               mask |= POLLHUP;
+       if (list_empty(&ps->list))
+               mask |= POLLERR;
        return mask;
 }
 
index bee1351..1d5fc32 100644 (file)
@@ -1052,14 +1052,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 
        /* Continue a partial initialization */
        if (type == HUB_INIT2 || type == HUB_INIT3) {
-               device_lock(hub->intfdev);
+               device_lock(&hdev->dev);
 
                /* Was the hub disconnected while we were waiting? */
-               if (hub->disconnected) {
-                       device_unlock(hub->intfdev);
-                       kref_put(&hub->kref, hub_release);
-                       return;
-               }
+               if (hub->disconnected)
+                       goto disconnected;
                if (type == HUB_INIT2)
                        goto init2;
                goto init3;
@@ -1262,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
                        queue_delayed_work(system_power_efficient_wq,
                                        &hub->init_work,
                                        msecs_to_jiffies(delay));
-                       device_unlock(hub->intfdev);
+                       device_unlock(&hdev->dev);
                        return;         /* Continues at init3: below */
                } else {
                        msleep(delay);
@@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
        /* Scan all ports that need attention */
        kick_hub_wq(hub);
 
-       /* Allow autosuspend if it was suppressed */
-       if (type <= HUB_INIT3)
+       if (type == HUB_INIT2 || type == HUB_INIT3) {
+               /* Allow autosuspend if it was suppressed */
+ disconnected:
                usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
-
-       if (type == HUB_INIT2 || type == HUB_INIT3)
-               device_unlock(hub->intfdev);
+               device_unlock(&hdev->dev);
+       }
 
        kref_put(&hub->kref, hub_release);
 }
@@ -1315,8 +1312,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
        struct usb_device *hdev = hub->hdev;
        int i;
 
-       cancel_delayed_work_sync(&hub->init_work);
-
        /* hub_wq and related activity won't re-trigger */
        hub->quiescing = 1;
 
index 9743353..e56d59b 100644 (file)
@@ -61,6 +61,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev)
        if (!simple->clks)
                return -ENOMEM;
 
+       platform_set_drvdata(pdev, simple);
        simple->dev = dev;
 
        for (i = 0; i < simple->num_clocks; i++) {
index 45f5a23..2eb84d6 100644 (file)
@@ -37,6 +37,7 @@
 #define PCI_DEVICE_ID_INTEL_BXT                        0x0aaa
 #define PCI_DEVICE_ID_INTEL_BXT_M              0x1aaa
 #define PCI_DEVICE_ID_INTEL_APL                        0x5aaa
+#define PCI_DEVICE_ID_INTEL_KBP                        0xa2b0
 
 static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
 static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -227,6 +228,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
        {  }    /* Terminating Entry */
 };
index 8f8c215..1f5597e 100644 (file)
@@ -829,7 +829,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
        if (!req->request.no_interrupt && !chain)
                trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI;
 
-       if (last)
+       if (last && !usb_endpoint_xfer_isoc(dep->endpoint.desc))
                trb->ctrl |= DWC3_TRB_CTRL_LST;
 
        if (chain)
@@ -1955,7 +1955,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
 
 static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                struct dwc3_request *req, struct dwc3_trb *trb,
-               const struct dwc3_event_depevt *event, int status)
+               const struct dwc3_event_depevt *event, int status,
+               int chain)
 {
        unsigned int            count;
        unsigned int            s_pkt = 0;
@@ -1964,17 +1965,22 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
        dep->queued_requests--;
        trace_dwc3_complete_trb(dep, trb);
 
+       /*
+        * If we're in the middle of series of chained TRBs and we
+        * receive a short transfer along the way, DWC3 will skip
+        * through all TRBs including the last TRB in the chain (the
+        * where CHN bit is zero. DWC3 will also avoid clearing HWO
+        * bit and SW has to do it manually.
+        *
+        * We're going to do that here to avoid problems of HW trying
+        * to use bogus TRBs for transfers.
+        */
+       if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+
        if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-               /*
-                * We continue despite the error. There is not much we
-                * can do. If we don't clean it up we loop forever. If
-                * we skip the TRB then it gets overwritten after a
-                * while since we use them in a ring buffer. A BUG()
-                * would help. Lets hope that if this occurs, someone
-                * fixes the root cause instead of looking away :)
-                */
-               dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
-                               dep->name, trb);
+               return 1;
+
        count = trb->size & DWC3_TRB_SIZE_MASK;
 
        if (dep->direction) {
@@ -2013,15 +2019,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                        s_pkt = 1;
        }
 
-       /*
-        * We assume here we will always receive the entire data block
-        * which we should receive. Meaning, if we program RX to
-        * receive 4K but we receive only 2K, we assume that's all we
-        * should receive and we simply bounce the request back to the
-        * gadget driver for further processing.
-        */
-       req->request.actual += req->request.length - count;
-       if (s_pkt)
+       if (s_pkt && !chain)
                return 1;
        if ((event->status & DEPEVT_STATUS_LST) &&
                        (trb->ctrl & (DWC3_TRB_CTRL_LST |
@@ -2040,13 +2038,17 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
        struct dwc3_trb         *trb;
        unsigned int            slot;
        unsigned int            i;
+       int                     count = 0;
        int                     ret;
 
        do {
+               int chain;
+
                req = next_request(&dep->started_list);
                if (WARN_ON_ONCE(!req))
                        return 1;
 
+               chain = req->request.num_mapped_sgs > 0;
                i = 0;
                do {
                        slot = req->first_trb_index + i;
@@ -2054,13 +2056,22 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
                                slot++;
                        slot %= DWC3_TRB_NUM;
                        trb = &dep->trb_pool[slot];
+                       count += trb->size & DWC3_TRB_SIZE_MASK;
 
                        ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-                                       event, status);
+                                       event, status, chain);
                        if (ret)
                                break;
                } while (++i < req->request.num_mapped_sgs);
 
+               /*
+                * We assume here we will always receive the entire data block
+                * which we should receive. Meaning, if we program RX to
+                * receive 4K but we receive only 2K, we assume that's all we
+                * should receive and we simply bounce the request back to the
+                * gadget driver for further processing.
+                */
+               req->request.actual += req->request.length - count;
                dwc3_gadget_giveback(dep, req, status);
 
                if (ret)
index eb64848..5ebe6af 100644 (file)
@@ -1913,6 +1913,8 @@ unknown:
                        break;
 
                case USB_RECIP_ENDPOINT:
+                       if (!cdev->config)
+                               break;
                        endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f);
                        list_for_each_entry(f, &cdev->config->functions, list) {
                                if (test_bit(endp, f->endpoints))
@@ -2124,14 +2126,14 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
 
        cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL);
        if (!cdev->os_desc_req) {
-               ret = PTR_ERR(cdev->os_desc_req);
+               ret = -ENOMEM;
                goto end;
        }
 
        /* OS feature descriptor length <= 4kB */
        cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
        if (!cdev->os_desc_req->buf) {
-               ret = PTR_ERR(cdev->os_desc_req->buf);
+               ret = -ENOMEM;
                kfree(cdev->os_desc_req);
                goto end;
        }
index 70cf347..f9237fe 100644 (file)
@@ -1490,7 +1490,9 @@ void unregister_gadget_item(struct config_item *item)
 {
        struct gadget_info *gi = to_gadget_info(item);
 
+       mutex_lock(&gi->lock);
        unregister_gadget(gi);
+       mutex_unlock(&gi->lock);
 }
 EXPORT_SYMBOL_GPL(unregister_gadget_item);
 
index 943c21a..ab6ac1b 100644 (file)
@@ -680,6 +680,12 @@ static int rndis_reset_response(struct rndis_params *params,
 {
        rndis_reset_cmplt_type *resp;
        rndis_resp_t *r;
+       u8 *xbuf;
+       u32 length;
+
+       /* drain the response queue */
+       while ((xbuf = rndis_get_next_response(params, &length)))
+               rndis_free_response(params, xbuf);
 
        r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type));
        if (!r)
index a3f7e7c..5f562c1 100644 (file)
@@ -556,7 +556,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
                        /* Multi frame CDC protocols may store the frame for
                         * later which is not a dropped frame.
                         */
-                       if (dev->port_usb->supports_multi_frame)
+                       if (dev->port_usb &&
+                                       dev->port_usb->supports_multi_frame)
                                goto multiframe;
                        goto drop;
                }
index 66753ba..31125a4 100644 (file)
@@ -2023,7 +2023,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src,
        if (!data) {
                kfree(*class_array);
                *class_array = NULL;
-               ret = PTR_ERR(data);
+               ret = -ENOMEM;
                goto unlock;
        }
        cl_arr = *class_array;
index aa3707b..16104b5 100644 (file)
@@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb,
         */
        spin_lock_irq(&epdata->dev->lock);
        value = -ENODEV;
-       if (unlikely(epdata->ep))
+       if (unlikely(epdata->ep == NULL))
                goto fail;
 
        req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
@@ -606,7 +606,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
        }
        if (is_sync_kiocb(iocb)) {
                value = ep_io(epdata, buf, len);
-               if (value >= 0 && copy_to_iter(buf, value, to))
+               if (value >= 0 && (copy_to_iter(buf, value, to) != value))
                        value = -EFAULT;
        } else {
                struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
index ff8685e..934f838 100644 (file)
@@ -1145,7 +1145,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
                        if (ret != -EPROBE_DEFER)
                                list_del(&driver->pending);
                        if (ret)
-                               goto err4;
+                               goto err5;
                        break;
                }
        }
@@ -1154,6 +1154,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        return 0;
 
+err5:
+       device_del(&udc->dev);
+
 err4:
        list_del(&udc->list);
        mutex_unlock(&udc_lock);
index 93d28cb..cf8819a 100644 (file)
@@ -2053,7 +2053,7 @@ static void setup_received_handle(struct qe_udc *udc,
                        struct qe_ep *ep;
 
                        if (wValue != 0 || wLength != 0
-                               || pipe > USB_MAX_ENDPOINTS)
+                               || pipe >= USB_MAX_ENDPOINTS)
                                break;
                        ep = &udc->eps[pipe];
 
index a962b89..1e5f529 100644 (file)
@@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci)
        int     port = HCS_N_PORTS(ehci->hcs_params);
 
        while (port--) {
-               ehci_writel(ehci, PORT_RWC_BITS,
-                               &ehci->regs->port_status[port]);
                spin_unlock_irq(&ehci->lock);
                ehci_port_power(ehci, port, false);
                spin_lock_irq(&ehci->lock);
+               ehci_writel(ehci, PORT_RWC_BITS,
+                               &ehci->regs->port_status[port]);
        }
 }
 
index c369c29..2f76900 100644 (file)
@@ -1675,7 +1675,7 @@ max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value)
        if (pin_number > 7)
                return;
 
-       mask = 1u << pin_number;
+       mask = 1u << (pin_number % 4);
        idx = pin_number / 4;
 
        if (value)
index d61fcc4..730b9fd 100644 (file)
@@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
 
        ret = 0;
        virt_dev = xhci->devs[slot_id];
+       if (!virt_dev)
+               return -ENODEV;
+
        cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
        if (!cmd) {
                xhci_dbg(xhci, "Couldn't allocate command structure.\n");
index 4fd041b..d7b0f97 100644 (file)
@@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev)
                usb_remove_hcd(xhci->shared_hcd);
                usb_put_hcd(xhci->shared_hcd);
        }
-       usb_hcd_pci_remove(dev);
 
        /* Workaround for spurious wakeups at shutdown with HSW */
        if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
                pci_set_power_state(dev, PCI_D3hot);
+
+       usb_hcd_pci_remove(dev);
 }
 
 #ifdef CONFIG_PM
index 918e0c7..fd9fd12 100644 (file)
@@ -1334,12 +1334,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 
        cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list);
 
-       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
-               xhci_err(xhci,
-                        "Command completion event does not match command\n");
-               return;
-       }
-
        del_timer(&xhci->cmd_timer);
 
        trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event);
@@ -1351,6 +1345,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                xhci_handle_stopped_cmd_ring(xhci, cmd);
                return;
        }
+
+       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
+               xhci_err(xhci,
+                        "Command completion event does not match command\n");
+               return;
+       }
+
        /*
         * Host aborted the command ring, check if the current command was
         * supposed to be aborted, otherwise continue normally.
@@ -3243,7 +3244,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
        send_addr = addr;
 
        /* Queue the TRBs, even if they are zero-length */
-       for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
+       for (enqd_len = 0; first_trb || enqd_len < full_len;
+                       enqd_len += trb_buff_len) {
                field = TRB_TYPE(TRB_NORMAL);
 
                /* TRB buffer should not cross 64KB boundaries */
index 52c27ca..9b5b3b2 100644 (file)
@@ -665,7 +665,7 @@ static ssize_t ftdi_elan_read(struct file *file, char __user *buffer,
 {
        char data[30 *3 + 4];
        char *d = data;
-       int m = (sizeof(data) - 1) / 3;
+       int m = (sizeof(data) - 1) / 3 - 1;
        int bytes_read = 0;
        int retry_on_empty = 10;
        int retry_on_timeout = 5;
@@ -1684,7 +1684,7 @@ wait:if (ftdi->disconnected > 0) {
                        int i = 0;
                        char data[30 *3 + 4];
                        char *d = data;
-                       int m = (sizeof(data) - 1) / 3;
+                       int m = (sizeof(data) - 1) / 3 - 1;
                        int l = 0;
                        struct u132_target *target = &ftdi->target[ed];
                        struct u132_command *command = &ftdi->command[
@@ -1876,7 +1876,7 @@ more:{
                if (packet_bytes > 2) {
                        char diag[30 *3 + 4];
                        char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                        char *b = ftdi->bulk_in_buffer;
                        int bytes_read = 0;
                        diag[0] = 0;
@@ -2053,7 +2053,7 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi)
                        if (packet_bytes > 2) {
                                char diag[30 *3 + 4];
                                char *d = diag;
-                               int m = (sizeof(diag) - 1) / 3;
+                               int m = (sizeof(diag) - 1) / 3 - 1;
                                char *b = ftdi->bulk_in_buffer;
                                int bytes_read = 0;
                                unsigned char c = 0;
@@ -2155,7 +2155,7 @@ more:{
                if (packet_bytes > 2) {
                        char diag[30 *3 + 4];
                        char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                        char *b = ftdi->bulk_in_buffer;
                        int bytes_read = 0;
                        diag[0] = 0;
index 6b978f0..5c8210d 100644 (file)
@@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req)
 {
        struct usb_sg_request   *req = (struct usb_sg_request *) _req;
 
-       req->status = -ETIMEDOUT;
        usb_sg_cancel(req);
 }
 
@@ -616,8 +615,10 @@ static int perform_sglist(
                mod_timer(&sg_timer, jiffies +
                                msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
                usb_sg_wait(req);
-               del_timer_sync(&sg_timer);
-               retval = req->status;
+               if (!del_timer_sync(&sg_timer))
+                       retval = -ETIMEDOUT;
+               else
+                       retval = req->status;
 
                /* FIXME check resulting data pattern */
 
@@ -2602,7 +2603,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf)
        ktime_get_ts64(&start);
 
        retval = usbtest_do_ioctl(intf, param_32);
-       if (retval)
+       if (retval < 0)
                goto free_mutex;
 
        ktime_get_ts64(&end);
index 6f6d2a7..6523af4 100644 (file)
@@ -140,6 +140,8 @@ static int omap_otg_probe(struct platform_device *pdev)
                 (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id,
                 otg_dev->vbus);
 
+       platform_set_drvdata(pdev, otg_dev);
+
        return 0;
 }
 
index 8fbbc2d..ac67bab 100644 (file)
@@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
        if (gpio > 0)
                dparam->enable_gpio = gpio;
 
-       if (dparam->type == USBHS_TYPE_RCAR_GEN2)
+       if (dparam->type == USBHS_TYPE_RCAR_GEN2 ||
+           dparam->type == USBHS_TYPE_RCAR_GEN3)
                dparam->has_usb_dmac = 1;
 
        return info;
index 280ed5f..857e783 100644 (file)
@@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done)
 
        /* use PIO if packet is less than pio_dma_border or pipe is DCP */
        if ((len < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                goto usbhsf_pio_prepare_push;
 
        /* check data length if this driver don't use USB-DMAC */
@@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
 
        /* use PIO if packet is less than pio_dma_border or pipe is DCP */
        if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                goto usbhsf_pio_prepare_pop;
 
        fifo = usbhsf_get_dma_fifo(priv, pkt);
index 50f3363..92bc83b 100644 (file)
@@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
                 * use dmaengine if possible.
                 * It will use pio handler if impossible.
                 */
-               if (usb_endpoint_dir_in(desc))
+               if (usb_endpoint_dir_in(desc)) {
                        pipe->handler = &usbhs_fifo_dma_push_handler;
-               else
+               } else {
                        pipe->handler = &usbhs_fifo_dma_pop_handler;
+                       usbhs_xxxsts_clear(priv, BRDYSTS,
+                                          usbhs_pipe_number(pipe));
+               }
 
                ret = 0;
        }
index 0082080..b2d767e 100644 (file)
@@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
        { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
        { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
        { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
@@ -1008,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
+       { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
        { }                                     /* Terminating entry */
 };
 
index c5d6c1e..f87a938 100644 (file)
 #define FTDI_4N_GALAXY_DE_2_PID        0xF3C1
 #define FTDI_4N_GALAXY_DE_3_PID        0xF3C2
 
+/*
+ * Ivium Technologies product IDs
+ */
+#define FTDI_PALMSENS_PID      0xf440
+#define FTDI_IVIUM_XSTAT_PID   0xf441
+
 /*
  * Linx Technologies product ids
  */
 #define INTREPID_VALUECAN_PID  0x0601
 #define INTREPID_NEOVI_PID     0x0701
 
+/*
+ * WICED USB UART
+ */
+#define WICED_VID              0x0A5C
+#define WICED_USB20706V2_PID   0x6422
+
 /*
  * Definitions for ID TECH (www.idt-net.com) devices
  */
index 8e07536..bc47258 100644 (file)
@@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4            0x1206
+#define TELIT_PRODUCT_LE920A4_1207             0x1207
+#define TELIT_PRODUCT_LE920A4_1208             0x1208
+#define TELIT_PRODUCT_LE920A4_1211             0x1211
+#define TELIT_PRODUCT_LE920A4_1212             0x1212
+#define TELIT_PRODUCT_LE920A4_1213             0x1213
+#define TELIT_PRODUCT_LE920A4_1214             0x1214
 
 /* ZTE PRODUCTS */
 #define ZTE_VENDOR_ID                          0x19d2
@@ -628,6 +634,11 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le920a4_blacklist_1 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1),
+};
+
 static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
        .sendsetup = BIT(2),
        .reserved = BIT(0) | BIT(1) | BIT(3),
@@ -1203,6 +1214,16 @@ static const struct usb_device_id option_ids[] = {
                .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
                .driver_info = (kernel_ulong_t)&telit_le920_blacklist },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
                .driver_info = (kernel_ulong_t)&net_intf1_blacklist },
@@ -1966,6 +1987,7 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                /* OLICARD300 - MT6225 */
        { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
        { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
index b1b9bac..d213cf4 100644 (file)
@@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
 
        rc = usb_register(udriver);
        if (rc)
-               return rc;
+               goto failed_usb_register;
 
        for (sd = serial_drivers; *sd; ++sd) {
                (*sd)->usb_driver = udriver;
@@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
        while (sd-- > serial_drivers)
                usb_serial_deregister(*sd);
        usb_deregister(udriver);
+failed_usb_register:
+       kfree(udriver);
        return rc;
 }
 EXPORT_SYMBOL_GPL(usb_serial_register_drivers);
index 15ecfc9..152b438 100644 (file)
@@ -564,67 +564,80 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
 }
 
 static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
-                                          uint32_t flags, void *data)
+                                          unsigned int count, uint32_t flags,
+                                          void *data)
 {
-       int32_t fd = *(int32_t *)data;
-
-       if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
-               return -EINVAL;
-
        /* DATA_NONE/DATA_BOOL enables loopback testing */
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               if (*ctx)
-                       eventfd_signal(*ctx, 1);
-               return 0;
+               if (*ctx) {
+                       if (count) {
+                               eventfd_signal(*ctx, 1);
+                       } else {
+                               eventfd_ctx_put(*ctx);
+                               *ctx = NULL;
+                       }
+                       return 0;
+               }
        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
-               uint8_t trigger = *(uint8_t *)data;
+               uint8_t trigger;
+
+               if (!count)
+                       return -EINVAL;
+
+               trigger = *(uint8_t *)data;
                if (trigger && *ctx)
                        eventfd_signal(*ctx, 1);
-               return 0;
-       }
 
-       /* Handle SET_DATA_EVENTFD */
-       if (fd == -1) {
-               if (*ctx)
-                       eventfd_ctx_put(*ctx);
-               *ctx = NULL;
                return 0;
-       } else if (fd >= 0) {
-               struct eventfd_ctx *efdctx;
-               efdctx = eventfd_ctx_fdget(fd);
-               if (IS_ERR(efdctx))
-                       return PTR_ERR(efdctx);
-               if (*ctx)
-                       eventfd_ctx_put(*ctx);
-               *ctx = efdctx;
+       } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+               int32_t fd;
+
+               if (!count)
+                       return -EINVAL;
+
+               fd = *(int32_t *)data;
+               if (fd == -1) {
+                       if (*ctx)
+                               eventfd_ctx_put(*ctx);
+                       *ctx = NULL;
+               } else if (fd >= 0) {
+                       struct eventfd_ctx *efdctx;
+
+                       efdctx = eventfd_ctx_fdget(fd);
+                       if (IS_ERR(efdctx))
+                               return PTR_ERR(efdctx);
+
+                       if (*ctx)
+                               eventfd_ctx_put(*ctx);
+
+                       *ctx = efdctx;
+               }
                return 0;
-       } else
-               return -EINVAL;
+       }
+
+       return -EINVAL;
 }
 
 static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
                                    unsigned index, unsigned start,
                                    unsigned count, uint32_t flags, void *data)
 {
-       if (index != VFIO_PCI_ERR_IRQ_INDEX)
+       if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
                return -EINVAL;
 
-       /*
-        * We should sanitize start & count, but that wasn't caught
-        * originally, so this IRQ index must forever ignore them :-(
-        */
-
-       return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
+       return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
+                                              count, flags, data);
 }
 
 static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
                                    unsigned index, unsigned start,
                                    unsigned count, uint32_t flags, void *data)
 {
-       if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
+       if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
                return -EINVAL;
 
-       return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
+       return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
+                                              count, flags, data);
 }
 
 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
index 9d6320e..6e29d05 100644 (file)
@@ -88,7 +88,7 @@ struct vhost_scsi_cmd {
        struct scatterlist *tvc_prot_sgl;
        struct page **tvc_upages;
        /* Pointer to response header iovec */
-       struct iovec *tvc_resp_iov;
+       struct iovec tvc_resp_iov;
        /* Pointer to vhost_scsi for our device */
        struct vhost_scsi *tvc_vhost;
        /* Pointer to vhost_virtqueue for the cmd */
@@ -547,7 +547,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
                memcpy(v_rsp.sense, cmd->tvc_sense_buf,
                       se_cmd->scsi_sense_length);
 
-               iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov,
+               iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov,
                              cmd->tvc_in_iovs, sizeof(v_rsp));
                ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
                if (likely(ret == sizeof(v_rsp))) {
@@ -1044,7 +1044,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                }
                cmd->tvc_vhost = vs;
                cmd->tvc_vq = vq;
-               cmd->tvc_resp_iov = &vq->iov[out];
+               cmd->tvc_resp_iov = vq->iov[out];
                cmd->tvc_in_iovs = in;
 
                pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
index 388eec4..97fb2f8 100644 (file)
@@ -220,20 +220,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
 {
        void *priv = NULL;
        long err;
-       struct vhost_memory *memory;
+       struct vhost_umem *umem;
 
        mutex_lock(&n->dev.mutex);
        err = vhost_dev_check_owner(&n->dev);
        if (err)
                goto done;
-       memory = vhost_dev_reset_owner_prepare();
-       if (!memory) {
+       umem = vhost_dev_reset_owner_prepare();
+       if (!umem) {
                err = -ENOMEM;
                goto done;
        }
        vhost_test_stop(n, &priv);
        vhost_test_flush(n);
-       vhost_dev_reset_owner(&n->dev, memory);
+       vhost_dev_reset_owner(&n->dev, umem);
 done:
        mutex_unlock(&n->dev.mutex);
        return err;
index 0ddf3a2..e3b30ea 100644 (file)
@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
 
        vhost_disable_notify(&vsock->dev, vq);
        for (;;) {
+               u32 len;
+
                if (!vhost_vsock_more_replies(vsock)) {
                        /* Stop tx until the device processes already
                         * pending replies.  Leave tx virtqueue
@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                        continue;
                }
 
+               len = pkt->len;
+
                /* Only accept correctly addressed packets */
                if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
                        virtio_transport_recv_pkt(pkt);
                else
                        virtio_transport_free_pkt(pkt);
 
-               vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+               vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
                added = true;
        }
 
index 114a0c8..e383ecd 100644 (file)
@@ -327,6 +327,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                 * host should service the ring ASAP. */
                if (out_sgs)
                        vq->notify(&vq->vq);
+               if (indirect)
+                       kfree(desc);
                END_USE(vq);
                return -ENOSPC;
        }
@@ -426,6 +428,7 @@ unmap_release:
        if (indirect)
                kfree(desc);
 
+       END_USE(vq);
        return -EIO;
 }
 
index 7487971..c1010f0 100644 (file)
@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
                        rc = -ENOMEM;
                        goto out;
                }
-       } else {
+       } else if (msg_type == XS_TRANSACTION_END) {
                list_for_each_entry(trans, &u->transactions, list)
                        if (trans->handle.id == u->u.msg.tx_id)
                                break;
index 4b0eff6..85737e9 100644 (file)
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
        case 1:
                _debug("extract FID count");
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                _debug("extract FID array");
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       call->count * 3 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                _debug("unmarshall FID array");
                call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
        case 3:
                _debug("extract CB count");
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                tmp = ntohl(call->tmp);
                _debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                _debug("extract CB array");
                ret = afs_extract_data(call, skb, last, call->request,
                                       call->count * 3 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                _debug("unmarshall CB array");
                cb = call->request;
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                call->unmarshall++;
 
        case 5:
-               _debug("trailer");
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
 
                /* Record that the message was unmarshalled successfully so
                 * that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                break;
        }
 
-       if (!last)
-               return 0;
 
        call->state = AFS_CALL_REPLYING;
 
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
 {
        struct afs_server *server;
        struct in_addr addr;
+       int ret;
 
        _enter(",{%u},%d", skb->len, last);
 
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
 
        _enter(",{%u},%d", skb->len, last);
 
+       /* There are some arguments that we ignore */
+       afs_data_consumed(call, skb);
        if (!last)
-               return 0;
+               return -EAGAIN;
 
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
 static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
                                bool last)
 {
+       int ret;
+
        _enter(",{%u},%d", skb->len, last);
 
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        switch (call->unmarshall) {
        case 0:
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
                break;
        }
 
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        call->state = AFS_CALL_REPLYING;
 
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
                                                 struct sk_buff *skb, bool last)
 {
+       int ret;
+
        _enter(",{%u},%d", skb->len, last);
 
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
index c2e930e..9312b92 100644 (file)
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter(",,%u", last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
        case 1:
                _debug("extract data length (MSW)");
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
        case 2:
                _debug("extract data length");
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                        ret = afs_extract_data(call, skb, last, buffer,
                                               call->count);
                        kunmap_atomic(buffer);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                }
 
                call->offset = 0;
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
        case 4:
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       (21 + 3 + 6) * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                bp = call->buffer;
                xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                call->unmarshall++;
 
        case 5:
-               _debug("trailer");
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
                break;
        }
 
-       if (!last)
-               return 0;
-
        if (call->count < PAGE_SIZE) {
                _debug("clear");
                page = call->reply3;
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
 {
        _enter(",{%u},%d", skb->len, last);
 
-       if (skb->len > 0)
-               return -EBADMSG; /* shouldn't be any reply data */
-       return 0;
+       /* shouldn't be any reply data */
+       return afs_data_complete(call, skb, last);
 }
 
 /*
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call,
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call,
 {
        struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call,
 {
        struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter(",,%u", last);
 
-       afs_transfer_reply(call, skb);
-       if (!last) {
-               _leave(" = 0 [more]");
-               return 0;
-       }
-
-       if (call->reply_size != call->reply_max) {
-               _leave(" = -EBADMSG [%u != %u]",
-                      call->reply_size, call->reply_max);
-               return -EBADMSG;
-       }
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
        afs_dataversion_t *store_version;
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
+       int ret;
 
        _enter(",,%u", last);
 
-       afs_transfer_reply(call, skb);
-       if (!last) {
-               _leave(" = 0 [more]");
-               return 0;
-       }
-
-       if (call->reply_size != call->reply_max) {
-               _leave(" = -EBADMSG [%u != %u]",
-                      call->reply_size, call->reply_max);
-               return -EBADMSG;
-       }
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        store_version = NULL;
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                _debug("extract status");
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       12 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                bp = call->buffer;
                xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                /* extract the volume name length */
        case 2:
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                if (call->count > 0) {
                        ret = afs_extract_data(call, skb, last, call->reply3,
                                               call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                }
 
                p = call->reply3;
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 4:
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->offset = 0;
                call->unmarshall++;
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                /* extract the offline message length */
        case 5:
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                if (call->count > 0) {
                        ret = afs_extract_data(call, skb, last, call->reply3,
                                               call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                }
 
                p = call->reply3;
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 7:
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->offset = 0;
                call->unmarshall++;
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                /* extract the message of the day length */
        case 8:
                ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->count = ntohl(call->tmp);
                _debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                if (call->count > 0) {
                        ret = afs_extract_data(call, skb, last, call->reply3,
                                               call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                }
 
                p = call->reply3;
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 10:
                ret = afs_extract_data(call, skb, last, call->buffer,
                                       call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
 
                call->offset = 0;
                call->unmarshall++;
        no_motd_padding:
 
        case 11:
-               _debug("trailer %d", skb->len);
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
                break;
        }
 
-       if (!last)
-               return 0;
-
        _leave(" = 0 [done]");
        return 0;
 }
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
                                    struct sk_buff *skb, bool last)
 {
        const __be32 *bp;
+       int ret;
 
        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
index 71d5982..df976b2 100644 (file)
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *);
  */
 extern int afs_open_socket(void);
 extern void afs_close_socket(void);
+extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
 extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
                         const struct afs_wait_mode *);
 extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
                                            size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
 extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
                            size_t);
 
+static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
+                                   bool last)
+{
+       if (skb->len > 0)
+               return -EBADMSG;
+       afs_data_consumed(call, skb);
+       if (!last)
+               return -EAGAIN;
+       return 0;
+}
+
 /*
  * security.c
  */
index 4832de8..14d04c8 100644 (file)
@@ -150,10 +150,9 @@ void afs_close_socket(void)
 }
 
 /*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Note that the data in a socket buffer is now consumed.
  */
-static void afs_data_delivered(struct sk_buff *skb)
+void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
 {
        if (!skb) {
                _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb)
        } else {
                _debug("DLVR %p{%u} [%d]",
                       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-               if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-                       BUG();
-               rxrpc_kernel_data_delivered(skb);
+               rxrpc_kernel_data_consumed(call->rxcall, skb);
        }
 }
 
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call)
                        last = rxrpc_kernel_is_data_last(skb);
                        ret = call->type->deliver(call, skb, last);
                        switch (ret) {
+                       case -EAGAIN:
+                               if (last) {
+                                       _debug("short data");
+                                       goto unmarshal_error;
+                               }
+                               break;
                        case 0:
-                               if (last &&
-                                   call->state == AFS_CALL_AWAIT_REPLY)
+                               ASSERT(last);
+                               if (call->state == AFS_CALL_AWAIT_REPLY)
                                        call->state = AFS_CALL_COMPLETE;
                                break;
                        case -ENOTCONN:
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                abort_code = RX_INVALID_OPERATION;
                                goto do_abort;
                        default:
+                       unmarshal_error:
                                abort_code = RXGEN_CC_UNMARSHAL;
                                if (call->state != AFS_CALL_AWAIT_REPLY)
                                        abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                call->state = AFS_CALL_ERROR;
                                break;
                        }
-                       afs_data_delivered(skb);
-                       skb = NULL;
-                       continue;
+                       break;
                case RXRPC_SKB_MARK_FINAL_ACK:
                        _debug("Rcv ACK");
                        call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call)
 }
 
 /*
- * empty a socket buffer into a flat reply buffer
+ * Empty a socket buffer into a flat reply buffer.
  */
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
 {
        size_t len = skb->len;
 
-       if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
-               BUG();
-       call->reply_size += len;
+       if (len > call->reply_max - call->reply_size) {
+               _leave(" = -EBADMSG [%zu > %u]",
+                      len, call->reply_max - call->reply_size);
+               return -EBADMSG;
+       }
+
+       if (len > 0) {
+               if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
+                                 len) < 0)
+                       BUG();
+               call->reply_size += len;
+       }
+
+       afs_data_consumed(call, skb);
+       if (!last)
+               return -EAGAIN;
+
+       if (call->reply_size != call->reply_max) {
+               _leave(" = -EBADMSG [%u != %u]",
+                      call->reply_size, call->reply_max);
+               return -EBADMSG;
+       }
+       return 0;
 }
 
 /*
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
 }
 
 /*
- * grab the operation ID from an incoming cache manager call
+ * Grab the operation ID from an incoming cache manager call.  The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
  */
 static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
                                bool last)
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
        call->offset += len;
 
        if (call->offset < 4) {
-               if (last) {
-                       _leave(" = -EBADMSG [op ID short]");
-                       return -EBADMSG;
-               }
-               _leave(" = 0 [incomplete]");
-               return 0;
+               afs_data_consumed(call, skb);
+               _leave(" = -EAGAIN");
+               return -EAGAIN;
        }
 
        call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 }
 
 /*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
  */
 int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
                     bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
        call->offset += len;
 
        if (call->offset < count) {
-               if (last) {
-                       _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
-                       return -EBADMSG;
-               }
+               afs_data_consumed(call, skb);
                _leave(" = -EAGAIN");
                return -EAGAIN;
        }
index 340afd0..f94d1ab 100644 (file)
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
        struct afs_cache_vlocation *entry;
        __be32 *bp;
        u32 tmp;
-       int loop;
+       int loop, ret;
 
        _enter(",,%u", last);
 
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
 
        /* unmarshall the reply once we've received all of it */
        entry = call->reply;
index c3cdde8..08ae993 100644 (file)
@@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
                 * thaw_bdev drops it.
                 */
                sb = get_super(bdev);
-               drop_super(sb);
+               if (sb)
+                       drop_super(sb);
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
                return sb;
        }
@@ -646,7 +647,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type,
 {
        struct dentry *dent;
        dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
-       if (dent)
+       if (!IS_ERR(dent))
                dent->d_sb->s_iflags |= SB_I_CGROUPWB;
        return dent;
 }
index 2b88439..455a6b2 100644 (file)
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
 
                        list_del(&ref2->list);
                        kmem_cache_free(btrfs_prelim_ref_cache, ref2);
+                       cond_resched();
                }
 
        }
index 2fe8f89..eff3993 100644 (file)
@@ -1028,6 +1028,7 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *qgroup_rescan_workers;
        struct completion qgroup_rescan_completion;
        struct btrfs_work qgroup_rescan_work;
+       bool qgroup_rescan_running;     /* protected by qgroup_rescan_lock */
 
        /* filesystem state */
        unsigned long fs_state;
@@ -1079,6 +1080,8 @@ struct btrfs_fs_info {
        struct list_head pinned_chunks;
 
        int creating_free_space_tree;
+       /* Used to record internally whether fs has been frozen */
+       int fs_frozen;
 };
 
 struct btrfs_subvolume_writers {
@@ -2578,7 +2581,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   u64 root_objectid, u64 owner, u64 offset,
                                   struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
                         u64 min_alloc_size, u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
index b6d210e..ac02e04 100644 (file)
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_qgroup_extent_record *qexisting;
        int count_mod = 1;
        int must_insert_reserved = 0;
 
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                qrecord->num_bytes = num_bytes;
                qrecord->old_roots = NULL;
 
-               qexisting = btrfs_qgroup_insert_dirty_extent(fs_info,
-                                                            delayed_refs,
-                                                            qrecord);
-               if (qexisting)
+               if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
+                                       delayed_refs, qrecord))
                        kfree(qrecord);
        }
 
@@ -862,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes)
-{
-       struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_delayed_ref_head *ref_head;
-       int ret = 0;
-
-       if (!fs_info->quota_enabled || !is_fstree(ref_root))
-               return 0;
-
-       delayed_refs = &trans->transaction->delayed_refs;
-
-       spin_lock(&delayed_refs->lock);
-       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
-       if (!ref_head) {
-               ret = -ENOENT;
-               goto out;
-       }
-       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
-       ref_head->qgroup_ref_root = ref_root;
-       ref_head->qgroup_reserved = num_bytes;
-out:
-       spin_unlock(&delayed_refs->lock);
-       return ret;
-}
-
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
index 5fca953..43f3629 100644 (file)
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               u64 parent, u64 ref_root,
                               u64 owner, u64 offset, u64 reserved, int action,
                               struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
index 59febfb..54bc8c7 100644 (file)
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
        u32 nritems = btrfs_header_nritems(leaf);
        int slot;
 
-       if (nritems == 0)
+       if (nritems == 0) {
+               struct btrfs_root *check_root;
+
+               key.objectid = btrfs_header_owner(leaf);
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+
+               check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+               /*
+                * The only reason we also check NULL here is that during
+                * open_ctree() some roots has not yet been set up.
+                */
+               if (!IS_ERR_OR_NULL(check_root)) {
+                       /* if leaf is the root, then it's fine */
+                       if (leaf->start !=
+                           btrfs_root_bytenr(&check_root->root_item)) {
+                               CORRUPT("non-root leaf's nritems is 0",
+                                       leaf, root, 0);
+                               return -EIO;
+                       }
+               }
                return 0;
+       }
 
        /* Check the 0 item */
        if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+       unsigned long nr = btrfs_header_nritems(node);
+
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+                          "corrupt node: block %llu root %llu nritems %lu",
+                          node->start, root->objectid, nr);
+               return -EIO;
+       }
+       return 0;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                ret = -EIO;
        }
 
+       if (found_level > 0 && check_node(root, eb))
+               ret = -EIO;
+
        if (!ret)
                set_extent_buffer_uptodate(eb);
 err:
@@ -1618,8 +1655,8 @@ fail:
        return ret;
 }
 
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-                                              u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id)
 {
        struct btrfs_root *root;
 
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
        fs_info->qgroup_ulist = NULL;
+       fs_info->qgroup_rescan_running = false;
        mutex_init(&fs_info->qgroup_rescan_lock);
 }
 
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
+       fs_info->fs_frozen = 0;
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
+               if (root->reloc_root) {
+                       free_extent_buffer(root->reloc_root->node);
+                       free_extent_buffer(root->reloc_root->commit_root);
+                       btrfs_put_fs_root(root->reloc_root);
+                       root->reloc_root = NULL;
+               }
+       }
 
        if (root->free_ino_pinned)
                __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
        smp_mb();
 
        /* wait for the qgroup rescan worker to stop */
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
 
        /* wait for the uuid_scan task to finish */
        down(&fs_info->uuid_tree_rescan_sem);
index b3207a0..f19a982 100644 (file)
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
                                      struct btrfs_key *location);
 int btrfs_init_fs_root(struct btrfs_root *root);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
                         struct btrfs_root *root);
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
index 61b494e..0450dc4 100644 (file)
@@ -60,21 +60,6 @@ enum {
        CHUNK_ALLOC_FORCE = 2,
 };
 
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- *   ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- *   bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
-       RESERVE_FREE = 0,
-       RESERVE_ALLOC = 1,
-       RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root, u64 bytenr,
                              u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level,
                         struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve,
-                                      int delalloc);
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc);
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
                               u64 num_bytes);
 int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@ again:
                dcs = BTRFS_DC_SETUP;
        else if (ret == -ENOSPC)
                set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
-       btrfs_free_reserved_data_space(inode, 0, num_pages);
 
 out_put:
        iput(inode);
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
        }
 }
 
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ *    - return 0 if it doesn't need to allocate a new chunk,
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ */
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 flags, int force)
 {
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root,
                                     btrfs_get_alloc_profile(root, 0),
                                     CHUNK_ALLOC_NO_FORCE);
                btrfs_end_transaction(trans, root);
-               if (ret == -ENOSPC)
+               if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
                break;
        case COMMIT_TRANS:
@@ -6497,19 +6491,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
 }
 
 /**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
+ * btrfs_add_reserved_bytes - update the block_group and space info counters
  * @cache:     The cache we are manipulating
+ * @ram_bytes:  The number of bytes of file content, and will be same to
+ *              @num_bytes except for the compress path.
  * @num_bytes: The number of bytes in question
- * @reserve:   One of the reservation enums
  * @delalloc:   The blocks are allocated for the delalloc write
  *
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk.  For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+ * This is called by the allocator when it reserves space. Metadata
+ * reservations should be called with RESERVE_ALLOC so we do the proper
  * ENOSPC accounting.  For data we handle the reservation through clearing the
  * delalloc bits in the io_tree.  We have to do this since we could end up
  * allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6509,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
  * make the reservation and return -EAGAIN, otherwise this function always
  * succeeds.
  */
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve, int delalloc)
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc)
 {
        struct btrfs_space_info *space_info = cache->space_info;
        int ret = 0;
 
        spin_lock(&space_info->lock);
        spin_lock(&cache->lock);
-       if (reserve != RESERVE_FREE) {
-               if (cache->ro) {
-                       ret = -EAGAIN;
-               } else {
-                       cache->reserved += num_bytes;
-                       space_info->bytes_reserved += num_bytes;
-                       if (reserve == RESERVE_ALLOC) {
-                               trace_btrfs_space_reservation(cache->fs_info,
-                                               "space_info", space_info->flags,
-                                               num_bytes, 0);
-                               space_info->bytes_may_use -= num_bytes;
-                       }
-
-                       if (delalloc)
-                               cache->delalloc_bytes += num_bytes;
-               }
+       if (cache->ro) {
+               ret = -EAGAIN;
        } else {
-               if (cache->ro)
-                       space_info->bytes_readonly += num_bytes;
-               cache->reserved -= num_bytes;
-               space_info->bytes_reserved -= num_bytes;
+               cache->reserved += num_bytes;
+               space_info->bytes_reserved += num_bytes;
 
+               trace_btrfs_space_reservation(cache->fs_info,
+                               "space_info", space_info->flags,
+                               ram_bytes, 0);
+               space_info->bytes_may_use -= ram_bytes;
                if (delalloc)
-                       cache->delalloc_bytes -= num_bytes;
+                       cache->delalloc_bytes += num_bytes;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&space_info->lock);
        return ret;
 }
 
+/**
+ * btrfs_free_reserved_bytes - update the block_group and space info counters
+ * @cache:      The cache we are manipulating
+ * @num_bytes:  The number of bytes in question
+ * @delalloc:   The blocks are allocated for the delalloc write
+ *
+ * This is called by somebody who is freeing space that was never actually used
+ * on disk.  For example if you reserve some space for a new leaf in transaction
+ * A and before transaction A commits you free that leaf, you call this with
+ * reserve set to 0 in order to clear the reservation.
+ */
+
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc)
+{
+       struct btrfs_space_info *space_info = cache->space_info;
+       int ret = 0;
+
+       spin_lock(&space_info->lock);
+       spin_lock(&cache->lock);
+       if (cache->ro)
+               space_info->bytes_readonly += num_bytes;
+       cache->reserved -= num_bytes;
+       space_info->bytes_reserved -= num_bytes;
+
+       if (delalloc)
+               cache->delalloc_bytes -= num_bytes;
+       spin_unlock(&cache->lock);
+       spin_unlock(&space_info->lock);
+       return ret;
+}
 void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root)
 {
@@ -7191,7 +7200,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
-               btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+               btrfs_free_reserved_bytes(cache, buf->len, 0);
                btrfs_put_block_group(cache);
                trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
                pin = 0;
@@ -7416,9 +7425,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
  * the free space extent currently.
  */
 static noinline int find_free_extent(struct btrfs_root *orig_root,
-                                    u64 num_bytes, u64 empty_size,
-                                    u64 hint_byte, struct btrfs_key *ins,
-                                    u64 flags, int delalloc)
+                               u64 ram_bytes, u64 num_bytes, u64 empty_size,
+                               u64 hint_byte, struct btrfs_key *ins,
+                               u64 flags, int delalloc)
 {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7439,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
        struct btrfs_space_info *space_info;
        int loop = 0;
        int index = __get_raid_index(flags);
-       int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
-               RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
        bool use_cluster = true;
@@ -7763,8 +7770,8 @@ checks:
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
-               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
-                                                 alloc_type, delalloc);
+               ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+                               num_bytes, delalloc);
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
                        goto loop;
@@ -7936,7 +7943,7 @@ again:
        up_read(&info->groups_sem);
 }
 
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7955,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
        flags = btrfs_get_alloc_profile(root, is_data);
 again:
        WARN_ON(num_bytes < root->sectorsize);
-       ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
-                              flags, delalloc);
+       ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+                              hint_byte, ins, flags, delalloc);
        if (!ret && !is_data) {
                btrfs_dec_block_group_reservations(root->fs_info,
                                                   ins->objectid);
@@ -7958,6 +7965,7 @@ again:
                        num_bytes = min(num_bytes >> 1, ins->offset);
                        num_bytes = round_down(num_bytes, root->sectorsize);
                        num_bytes = max(num_bytes, min_alloc_size);
+                       ram_bytes = num_bytes;
                        if (num_bytes == min_alloc_size)
                                final_tried = true;
                        goto again;
@@ -7995,7 +8003,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                if (btrfs_test_opt(root->fs_info, DISCARD))
                        ret = btrfs_discard_extent(root, start, len, NULL);
                btrfs_add_free_space(cache, start, len);
-               btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+               btrfs_free_reserved_bytes(cache, len, delalloc);
                trace_btrfs_reserved_extent_free(root, start, len);
        }
 
@@ -8223,8 +8231,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        if (!block_group)
                return -EINVAL;
 
-       ret = btrfs_update_reserved_bytes(block_group, ins->offset,
-                                         RESERVE_ALLOC_NO_ACCOUNT, 0);
+       ret = btrfs_add_reserved_bytes(block_group, ins->offset,
+                                      ins->offset, 0);
        BUG_ON(ret); /* logic error */
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
@@ -8368,7 +8376,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
 
-       ret = btrfs_reserve_extent(root, blocksize, blocksize,
+       ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
                                   empty_size, hint, &ins, 0, 0);
        if (ret)
                goto out_unuse;
@@ -8521,35 +8529,6 @@ reada:
        wc->reada_slot = slot;
 }
 
-/*
- * These may not be seen by the usual inc/dec ref code so we have to
- * add them here.
- */
-static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root, u64 bytenr,
-                                    u64 num_bytes)
-{
-       struct btrfs_qgroup_extent_record *qrecord;
-       struct btrfs_delayed_ref_root *delayed_refs;
-
-       qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
-       if (!qrecord)
-               return -ENOMEM;
-
-       qrecord->bytenr = bytenr;
-       qrecord->num_bytes = num_bytes;
-       qrecord->old_roots = NULL;
-
-       delayed_refs = &trans->transaction->delayed_refs;
-       spin_lock(&delayed_refs->lock);
-       if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
-                                            delayed_refs, qrecord))
-               kfree(qrecord);
-       spin_unlock(&delayed_refs->lock);
-
-       return 0;
-}
-
 static int account_leaf_items(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *eb)
@@ -8583,7 +8562,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
 
                num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
 
-               ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+               ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+                               bytenr, num_bytes, GFP_NOFS);
                if (ret)
                        return ret;
        }
@@ -8732,8 +8712,9 @@ walk_down:
                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 
-                       ret = record_one_subtree_extent(trans, root, child_bytenr,
-                                                       root->nodesize);
+                       ret = btrfs_qgroup_insert_dirty_extent(trans,
+                                       root->fs_info, child_bytenr,
+                                       root->nodesize, GFP_NOFS);
                        if (ret)
                                goto out;
                }
@@ -9906,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root,
                        } else {
                                ret = 0;
                        }
+                       free_extent_map(em);
                        goto out;
                }
                path->slots[0]++;
@@ -9942,6 +9924,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
                block_group->iref = 0;
                block_group->inode = NULL;
                spin_unlock(&block_group->lock);
+               ASSERT(block_group->io_ctl.inode == NULL);
                iput(inode);
                last = block_group->key.objectid + block_group->key.offset;
                btrfs_put_block_group(block_group);
@@ -9999,6 +9982,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                        free_excluded_extents(info->extent_root, block_group);
 
                btrfs_remove_free_space_cache(block_group);
+               ASSERT(list_empty(&block_group->dirty_list));
+               ASSERT(list_empty(&block_group->io_list));
+               ASSERT(list_empty(&block_group->bg_list));
+               ASSERT(atomic_read(&block_group->count) == 1);
                btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
index bc2729a..28cd88f 100644 (file)
@@ -20,6 +20,7 @@
 #define EXTENT_DAMAGED         (1U << 14)
 #define EXTENT_NORESERVE       (1U << 15)
 #define EXTENT_QGROUP_RESERVED (1U << 16)
+#define EXTENT_CLEAR_DATA_RESV (1U << 17)
 #define EXTENT_IOBITS          (EXTENT_LOCKED | EXTENT_WRITEBACK)
 #define EXTENT_CTLBITS         (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
index 9404121..fea31a4 100644 (file)
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
+               /*
+                * An ordered extent might have started before and completed
+                * already with io errors, in which case the inode was not
+                * updated and we end up here. So check the inode's mapping
+                * flags for any errors that might have happened while doing
+                * writeback of file data.
+                */
+               ret = btrfs_inode_check_errors(inode);
                inode_unlock(inode);
                goto out;
        }
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        }
        trans->sync = true;
 
-       btrfs_init_log_ctx(&ctx);
+       btrfs_init_log_ctx(&ctx, inode);
 
        ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
        if (ret < 0) {
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 
        alloc_start = round_down(offset, blocksize);
        alloc_end = round_up(offset + len, blocksize);
+       cur_offset = alloc_start;
 
        /* Make sure we aren't being give some crap mode */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 
        /* First, check if we exceed the qgroup limit */
        INIT_LIST_HEAD(&reserve_list);
-       cur_offset = alloc_start;
        while (1) {
                em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                      alloc_end - cur_offset, 0);
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        last_byte - cur_offset);
                        if (ret < 0)
                                break;
+               } else {
+                       /*
+                        * Do not need to reserve unwritten extent for this
+                        * range, free reserved data space first, otherwise
+                        * it'll result in false ENOSPC error.
+                        */
+                       btrfs_free_reserved_data_space(inode, cur_offset,
+                               last_byte - cur_offset);
                }
                free_extent_map(em);
                cur_offset = last_byte;
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        range->start,
                                        range->len, 1 << inode->i_blkbits,
                                        offset + len, &alloc_hint);
+               else
+                       btrfs_free_reserved_data_space(inode, range->start,
+                                                      range->len);
                list_del(&range->list);
                kfree(range);
        }
@@ -2837,18 +2856,11 @@ out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
                             &cached_state, GFP_KERNEL);
 out:
-       /*
-        * As we waited the extent range, the data_rsv_map must be empty
-        * in the range, as written data range will be released from it.
-        * And for prealloacted extent, it will also be released when
-        * its metadata is written.
-        * So this is completely used as cleanup.
-        */
-       btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
        inode_unlock(inode);
        /* Let go of our reservation. */
-       btrfs_free_reserved_data_space(inode, alloc_start,
-                                      alloc_end - alloc_start);
+       if (ret != 0)
+               btrfs_free_reserved_data_space(inode, alloc_start,
+                                      alloc_end - cur_offset);
        return ret;
 }
 
index aa6faba..359ee86 100644 (file)
@@ -495,10 +495,9 @@ again:
        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
                                              prealloc, prealloc, &alloc_hint);
        if (ret) {
-               btrfs_delalloc_release_space(inode, 0, prealloc);
+               btrfs_delalloc_release_metadata(inode, prealloc);
                goto out_put;
        }
-       btrfs_free_reserved_data_space(inode, 0, prealloc);
 
        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 out_put:
index 2f59759..e6811c4 100644 (file)
@@ -566,6 +566,8 @@ cont:
                                                     PAGE_SET_WRITEBACK |
                                                     page_error_op |
                                                     PAGE_END_WRITEBACK);
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        goto free_pages_out;
                }
        }
@@ -742,7 +744,7 @@ retry:
                lock_extent(io_tree, async_extent->start,
                            async_extent->start + async_extent->ram_size - 1);
 
-               ret = btrfs_reserve_extent(root,
+               ret = btrfs_reserve_extent(root, async_extent->ram_size,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
                                     EXTENT_DEFRAG, PAGE_UNLOCK |
                                     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
                                     PAGE_END_WRITEBACK);
-
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        *nr_written = *nr_written +
                             (end - start + PAGE_SIZE) / PAGE_SIZE;
                        *page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
                unsigned long op;
 
                cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                                           &ins, 1, 1);
                if (ret < 0)
@@ -1489,8 +1492,10 @@ out_check:
                extent_clear_unlock_delalloc(inode, cur_offset,
                                             cur_offset + num_bytes - 1,
                                             locked_page, EXTENT_LOCKED |
-                                            EXTENT_DELALLOC, PAGE_UNLOCK |
-                                            PAGE_SET_PRIVATE2);
+                                            EXTENT_DELALLOC |
+                                            EXTENT_CLEAR_DATA_RESV,
+                                            PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+
                if (!nolock && nocow)
                        btrfs_end_write_no_snapshoting(root);
                cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                        return;
 
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-                   && do_list && !(state->state & EXTENT_NORESERVE))
+                   && do_list && !(state->state & EXTENT_NORESERVE)
+                   && (*bits & (EXTENT_DO_ACCOUNTING |
+                   EXTENT_CLEAR_DATA_RESV)))
                        btrfs_free_reserved_data_space_noquota(inode,
                                        state->start, len);
 
@@ -3435,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
                ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ESTALE)
+               if (ret && ret != -ENOENT)
                        goto out;
 
-               if (ret == -ESTALE && root == root->fs_info->tree_root) {
+               if (ret == -ENOENT && root == root->fs_info->tree_root) {
                        struct btrfs_root *dead_root;
                        struct btrfs_fs_info *fs_info = root->fs_info;
                        int is_dead_root = 0;
@@ -3474,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * Inode is already gone but the orphan item is still there,
                 * kill the orphan item.
                 */
-               if (ret == -ESTALE) {
+               if (ret == -ENOENT) {
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
@@ -3633,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
  * read an inode from the btree into the in-memory inode
  */
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -3652,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
                filled = true;
 
        path = btrfs_alloc_path();
-       if (!path)
+       if (!path) {
+               ret = -ENOMEM;
                goto make_bad;
+       }
 
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
-       if (ret)
+       if (ret) {
+               if (ret > 0)
+                       ret = -ENOENT;
                goto make_bad;
+       }
 
        leaf = path->nodes[0];
 
@@ -3812,11 +3824,12 @@ cache_acl:
        }
 
        btrfs_update_iflags(inode);
-       return;
+       return 0;
 
 make_bad:
        btrfs_free_path(path);
        make_bad_inode(inode);
+       return ret;
 }
 
 /*
@@ -4204,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        int err = 0;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
@@ -4226,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
        /* now the directory is empty */
        err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                 dentry->d_name.name, dentry->d_name.len);
-       if (!err)
+       if (!err) {
                btrfs_i_size_write(inode, 0);
+               /*
+                * Propagate the last_unlink_trans value of the deleted dir to
+                * its parent directory. This is to prevent an unrecoverable
+                * log tree in the case we do something like this:
+                * 1) create dir foo
+                * 2) create snapshot under dir foo
+                * 3) delete the snapshot
+                * 4) rmdir foo
+                * 5) mkdir foo
+                * 6) fsync foo or some file inside foo
+                */
+               if (last_unlink_trans >= trans->transid)
+                       BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+       }
 out:
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
@@ -5606,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                return ERR_PTR(-ENOMEM);
 
        if (inode->i_state & I_NEW) {
-               btrfs_read_locked_inode(inode);
+               int ret;
+
+               ret = btrfs_read_locked_inode(inode);
                if (!is_bad_inode(inode)) {
                        inode_tree_add(inode);
                        unlock_new_inode(inode);
@@ -5615,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                } else {
                        unlock_new_inode(inode);
                        iput(inode);
-                       inode = ERR_PTR(-ESTALE);
+                       ASSERT(ret < 0);
+                       inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
                }
        }
 
@@ -7225,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        int ret;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
                                   alloc_hint, &ins, 1, 1);
        if (ret)
                return ERR_PTR(ret);
@@ -7725,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                ret = PTR_ERR(em2);
                                goto unlock_err;
                        }
+                       /*
+                        * For inode marked NODATACOW or extent marked PREALLOC,
+                        * use the existing or preallocated extent, so does not
+                        * need to adjust btrfs_space_info's bytes_may_use.
+                        */
+                       btrfs_free_reserved_data_space_noquota(inode,
+                                       start, len);
                        goto unlock;
                }
        }
@@ -7759,7 +7799,6 @@ unlock:
                        i_size_write(inode, start + len);
 
                adjust_dio_outstanding_extents(inode, dio_data, len);
-               btrfs_free_reserved_data_space(inode, start, len);
                WARN_ON(dio_data->reserve < len);
                dio_data->reserve -= len;
                dio_data->unsubmitted_oe_range_end = start + len;
@@ -10280,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
        u64 last_alloc = (u64)-1;
        int ret = 0;
        bool own_trans = true;
+       u64 end = start + num_bytes - 1;
 
        if (trans)
                own_trans = false;
@@ -10301,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                 * sized chunks.
                 */
                cur_bytes = min(cur_bytes, last_alloc);
-               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
-                                          *alloc_hint, &ins, 1, 0);
+               ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+                               min_size, 0, *alloc_hint, &ins, 1, 0);
                if (ret) {
                        if (own_trans)
                                btrfs_end_transaction(trans, root);
@@ -10388,6 +10428,9 @@ next:
                if (own_trans)
                        btrfs_end_transaction(trans, root);
        }
+       if (cur_offset < end)
+               btrfs_free_reserved_data_space(inode, cur_offset,
+                       end - cur_offset + 1);
        return ret;
 }
 
index 14ed1e9..b2a2da5 100644 (file)
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       return btrfs_qgroup_wait_for_completion(root->fs_info);
+       return btrfs_qgroup_wait_for_completion(root->fs_info, true);
 }
 
 static long _btrfs_ioctl_set_received_subvol(struct file *file,
index 93ee1c1..8db2e29 100644 (file)
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
                goto out;
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-                                struct btrfs_delayed_ref_root *delayed_refs,
-                                struct btrfs_qgroup_extent_record *record)
+int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
+                               struct btrfs_delayed_ref_root *delayed_refs,
+                               struct btrfs_qgroup_extent_record *record)
 {
        struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
        struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
                else if (bytenr > entry->bytenr)
                        p = &(*p)->rb_right;
                else
-                       return entry;
+                       return 1;
        }
 
        rb_link_node(&record->node, parent_node, p);
        rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
-       return NULL;
+       return 0;
+}
+
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+               struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+               gfp_t gfp_flag)
+{
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       int ret;
+
+       if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
+               return 0;
+       if (WARN_ON(trans == NULL))
+               return -EINVAL;
+       record = kmalloc(sizeof(*record), gfp_flag);
+       if (!record)
+               return -ENOMEM;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       record->bytenr = bytenr;
+       record->num_bytes = num_bytes;
+       record->old_roots = NULL;
+
+       spin_lock(&delayed_refs->lock);
+       ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
+                                                     record);
+       spin_unlock(&delayed_refs->lock);
+       if (ret > 0)
+               kfree(record);
+       return 0;
 }
 
 #define UPDATE_NEW     0
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        int err = -ENOMEM;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       fs_info->qgroup_rescan_running = true;
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
        path = btrfs_alloc_path();
        if (!path)
                goto out;
@@ -2369,6 +2402,9 @@ out:
        }
 
 done:
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       fs_info->qgroup_rescan_running = false;
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
        complete_all(&fs_info->qgroup_rescan_completion);
 }
 
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+                                    bool interruptible)
 {
        int running;
        int ret = 0;
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
        spin_lock(&fs_info->qgroup_lock);
-       running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+       running = fs_info->qgroup_rescan_running;
        spin_unlock(&fs_info->qgroup_lock);
        mutex_unlock(&fs_info->qgroup_rescan_lock);
 
-       if (running)
+       if (!running)
+               return 0;
+
+       if (interruptible)
                ret = wait_for_completion_interruptible(
                                        &fs_info->qgroup_rescan_completion);
+       else
+               wait_for_completion(&fs_info->qgroup_rescan_completion);
 
        return ret;
 }
index 710887c..1bc64c8 100644 (file)
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
                        struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+                                    bool interruptible);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
 struct btrfs_delayed_extent_op;
 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-                                struct btrfs_delayed_ref_root *delayed_refs,
-                                struct btrfs_qgroup_extent_record *record);
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
+ *
+ * Return 0 for success insert
+ * Return >0 for existing record, caller can free @record safely.
+ * Error is not possible
+ */
+int btrfs_qgroup_insert_dirty_extent_nolock(
+               struct btrfs_fs_info *fs_info,
+               struct btrfs_delayed_ref_root *delayed_refs,
+               struct btrfs_qgroup_extent_record *record);
+
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * Better encapsulated version.
+ *
+ * Return 0 if the operation is done.
+ * Return <0 for error, like memory allocation failure or invalid parameter
+ * (NULL trans)
+ */
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+               struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+               gfp_t gfp_flag);
+
 int
 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
                            struct btrfs_fs_info *fs_info,
index b26a5ae..8a2c2a0 100644 (file)
@@ -31,6 +31,7 @@
 #include "async-thread.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "qgroup.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
        u64 num_bytes;
        int nr = 0;
        int ret = 0;
+       u64 prealloc_start = cluster->start - offset;
+       u64 prealloc_end = cluster->end - offset;
+       u64 cur_offset;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
        inode_lock(inode);
 
-       ret = btrfs_check_data_free_space(inode, cluster->start,
-                                         cluster->end + 1 - cluster->start);
+       ret = btrfs_check_data_free_space(inode, prealloc_start,
+                                         prealloc_end + 1 - prealloc_start);
        if (ret)
                goto out;
 
+       cur_offset = prealloc_start;
        while (nr < cluster->nr) {
                start = cluster->boundary[nr] - offset;
                if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
 
                lock_extent(&BTRFS_I(inode)->io_tree, start, end);
                num_bytes = end + 1 - start;
+               if (cur_offset < start)
+                       btrfs_free_reserved_data_space(inode, cur_offset,
+                                       start - cur_offset);
                ret = btrfs_prealloc_file_range(inode, 0, start,
                                                num_bytes, num_bytes,
                                                end + 1, &alloc_hint);
+               cur_offset = end + 1;
                unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
                if (ret)
                        break;
                nr++;
        }
-       btrfs_free_reserved_data_space(inode, cluster->start,
-                                      cluster->end + 1 - cluster->start);
+       if (cur_offset < prealloc_end)
+               btrfs_free_reserved_data_space(inode, cur_offset,
+                                      prealloc_end + 1 - cur_offset);
 out:
        inode_unlock(inode);
        return ret;
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
        return 0;
 }
 
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+                                            struct reloc_control *rc)
+{
+       struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+       struct inode *inode = rc->data_inode;
+       struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret = 0;
+
+       if (!fs_info->quota_enabled)
+               return 0;
+
+       /*
+        * Only for stage where we update data pointers the qgroup fix is
+        * valid.
+        * For MOVING_DATA stage, we will miss the timing of swapping tree
+        * blocks, and won't fix it.
+        */
+       if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+
+       lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+       while (1) {
+               struct btrfs_file_extent_item *fi;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid > btrfs_ino(inode))
+                       break;
+               if (key.type != BTRFS_EXTENT_DATA_KEY)
+                       goto next;
+               fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(path->nodes[0], fi) !=
+                               BTRFS_FILE_EXTENT_REG)
+                       goto next;
+               ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+                       btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+                       btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+                       GFP_NOFS);
+               if (ret < 0)
+                       break;
+next:
+               ret = btrfs_next_item(data_reloc_root, path);
+               if (ret < 0)
+                       break;
+               if (ret > 0) {
+                       ret = 0;
+                       break;
+               }
+       }
+       unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
        struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,16 @@ restart:
 
        /* get rid of pinned extents */
        trans = btrfs_join_transaction(rc->extent_root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
-       else
-               btrfs_commit_transaction(trans, rc->extent_root);
+               goto out_free;
+       }
+       err = qgroup_fix_relocated_data_extents(trans, rc);
+       if (err < 0) {
+               btrfs_abort_transaction(trans, err);
+               goto out_free;
+       }
+       btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
        btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
        btrfs_free_path(path);
@@ -4468,10 +4568,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        unset_reloc_control(rc);
 
        trans = btrfs_join_transaction(rc->extent_root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
-       else
-               err = btrfs_commit_transaction(trans, rc->extent_root);
+               goto out_free;
+       }
+       err = qgroup_fix_relocated_data_extents(trans, rc);
+       if (err < 0) {
+               btrfs_abort_transaction(trans, err);
+               goto out_free;
+       }
+       err = btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
        kfree(rc);
 out:
index 7fd7e18..0912960 100644 (file)
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                root_key.objectid = key.offset;
                key.offset++;
 
+               /*
+                * The root might have been inserted already, as before we look
+                * for orphan roots, log replay might have happened, which
+                * triggers a transaction commit and qgroup accounting, which
+                * in turn reads and inserts fs roots while doing backref
+                * walking.
+                */
+               root = btrfs_lookup_fs_root(tree_root->fs_info,
+                                           root_key.objectid);
+               if (root) {
+                       WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+                                         &root->state));
+                       if (btrfs_root_refs(&root->root_item) == 0)
+                               btrfs_add_dead_root(root);
+                       continue;
+               }
+
                root = btrfs_read_fs_root(tree_root, &root_key);
                err = PTR_ERR_OR_ZERO(root);
                if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
                err = btrfs_insert_fs_root(root->fs_info, root);
-               /*
-                * The root might have been inserted already, as before we look
-                * for orphan roots, log replay might have happened, which
-                * triggers a transaction commit and qgroup accounting, which
-                * in turn reads and inserts fs roots while doing backref
-                * walking.
-                */
-               if (err == -EEXIST)
-                       err = 0;
                if (err) {
+                       BUG_ON(err == -EEXIST);
                        btrfs_free_fs_root(root);
                        break;
                }
index b71dd29..efe129f 100644 (file)
@@ -231,7 +231,6 @@ struct pending_dir_move {
        u64 parent_ino;
        u64 ino;
        u64 gen;
-       bool is_orphan;
        struct list_head update_refs;
 };
 
@@ -274,6 +273,39 @@ struct name_cache_entry {
        char name[];
 };
 
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+                                       enum btrfs_compare_tree_result result,
+                                       const char *what)
+{
+       const char *result_string;
+
+       switch (result) {
+       case BTRFS_COMPARE_TREE_NEW:
+               result_string = "new";
+               break;
+       case BTRFS_COMPARE_TREE_DELETED:
+               result_string = "deleted";
+               break;
+       case BTRFS_COMPARE_TREE_CHANGED:
+               result_string = "updated";
+               break;
+       case BTRFS_COMPARE_TREE_SAME:
+               ASSERT(0);
+               result_string = "unchanged";
+               break;
+       default:
+               ASSERT(0);
+               result_string = "unexpected";
+       }
+
+       btrfs_err(sctx->send_root->fs_info,
+                 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+                 result_string, what, sctx->cmp_key->objectid,
+                 sctx->send_root->root_key.objectid,
+                 (sctx->parent_root ?
+                  sctx->parent_root->root_key.objectid : 0));
+}
+
 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 
 static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
         * was already unlinked/moved, so we can safely assume that we will not
         * overwrite anything at this point in time.
         */
-       if (other_inode > sctx->send_progress) {
+       if (other_inode > sctx->send_progress ||
+           is_waiting_for_move(sctx, other_inode)) {
                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
                                who_gen, NULL, NULL, NULL, NULL);
                if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+       if (ret > 0)
+               ret = -ENOENT;
        if (ret < 0)
                goto out;
 
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
                }
 
                if (loc.objectid > send_progress) {
+                       struct orphan_dir_info *odi;
+
+                       odi = get_orphan_dir_info(sctx, dir);
+                       free_orphan_dir_info(sctx, odi);
                        ret = 0;
                        goto out;
                }
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
        pm->parent_ino = parent_ino;
        pm->ino = ino;
        pm->gen = ino_gen;
-       pm->is_orphan = is_orphan;
        INIT_LIST_HEAD(&pm->list);
        INIT_LIST_HEAD(&pm->update_refs);
        RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
        return NULL;
 }
 
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+                    u64 ino, u64 gen, u64 *ancestor_ino)
+{
+       int ret = 0;
+       u64 parent_inode = 0;
+       u64 parent_gen = 0;
+       u64 start_ino = ino;
+
+       *ancestor_ino = 0;
+       while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+               fs_path_reset(name);
+
+               if (is_waiting_for_rm(sctx, ino))
+                       break;
+               if (is_waiting_for_move(sctx, ino)) {
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       ret = get_first_ref(sctx->parent_root, ino,
+                                           &parent_inode, &parent_gen, name);
+               } else {
+                       ret = __get_cur_name_and_parent(sctx, ino, gen,
+                                                       &parent_inode,
+                                                       &parent_gen, name);
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+               }
+               if (ret < 0)
+                       break;
+               if (parent_inode == start_ino) {
+                       ret = 1;
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       break;
+               }
+               ino = parent_inode;
+               gen = parent_gen;
+       }
+       return ret;
+}
+
 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 {
        struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        u64 parent_ino, parent_gen;
        struct waiting_dir_move *dm = NULL;
        u64 rmdir_ino = 0;
+       u64 ancestor;
+       bool is_orphan;
        int ret;
 
        name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        dm = get_waiting_dir_move(sctx, pm->ino);
        ASSERT(dm);
        rmdir_ino = dm->rmdir_ino;
+       is_orphan = dm->orphanized;
        free_waiting_dir_move(sctx, dm);
 
-       if (pm->is_orphan) {
+       if (is_orphan) {
                ret = gen_unique_name(sctx, pm->ino,
                                      pm->gen, from_path);
        } else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                goto out;
 
        sctx->send_progress = sctx->cur_ino + 1;
+       ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+       if (ret < 0)
+               goto out;
+       if (ret) {
+               LIST_HEAD(deleted_refs);
+               ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+               ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+                                          &pm->update_refs, &deleted_refs,
+                                          is_orphan);
+               if (ret < 0)
+                       goto out;
+               if (rmdir_ino) {
+                       dm = get_waiting_dir_move(sctx, pm->ino);
+                       ASSERT(dm);
+                       dm->rmdir_ino = rmdir_ino;
+               }
+               goto out;
+       }
        fs_path_reset(name);
        to_path = name;
        name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                        /* already deleted */
                        goto finish;
                }
-               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
                if (ret < 0)
                        goto out;
                if (!ret)
@@ -3204,8 +3305,18 @@ finish:
         * and old parent(s).
         */
        list_for_each_entry(cur, &pm->update_refs, list) {
-               if (cur->dir == rmdir_ino)
+               /*
+                * The parent inode might have been deleted in the send snapshot
+                */
+               ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+                                    NULL, NULL, NULL, NULL, NULL);
+               if (ret == -ENOENT) {
+                       ret = 0;
                        continue;
+               }
+               if (ret < 0)
+                       goto out;
+
                ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                if (ret < 0)
                        goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
        u64 left_gen;
        u64 right_gen;
        int ret = 0;
+       struct waiting_dir_move *wdm;
 
        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
                return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
                goto out;
        }
 
-       if (is_waiting_for_move(sctx, di_key.objectid)) {
+       wdm = get_waiting_dir_move(sctx, di_key.objectid);
+       if (wdm && !wdm->orphanized) {
                ret = add_pending_dir_move(sctx,
                                           sctx->cur_ino,
                                           sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
                        ret = is_ancestor(sctx->parent_root,
                                          sctx->cur_ino, sctx->cur_inode_gen,
                                          ino, path_before);
-                       break;
+                       if (ret)
+                               break;
                }
 
                fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                goto out;
                        if (ret) {
                                struct name_cache_entry *nce;
+                               struct waiting_dir_move *wdm;
 
                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
                                                cur->full_path);
                                if (ret < 0)
                                        goto out;
+
+                               /*
+                                * If ow_inode has its rename operation delayed
+                                * make sure that its orphanized name is used in
+                                * the source path when performing its rename
+                                * operation.
+                                */
+                               if (is_waiting_for_move(sctx, ow_inode)) {
+                                       wdm = get_waiting_dir_move(sctx,
+                                                                  ow_inode);
+                                       ASSERT(wdm);
+                                       wdm->orphanized = true;
+                               }
+
                                /*
                                 * Make sure we clear our orphanized inode's
                                 * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                        name_cache_delete(sctx, nce);
                                        kfree(nce);
                                }
+
+                               /*
+                                * ow_inode might currently be an ancestor of
+                                * cur_ino, therefore compute valid_path (the
+                                * current path of cur_ino) again because it
+                                * might contain the pre-orphanization name of
+                                * ow_inode, which is no longer valid.
+                                */
+                               fs_path_reset(valid_path);
+                               ret = get_cur_path(sctx, sctx->cur_ino,
+                                          sctx->cur_inode_gen, valid_path);
+                               if (ret < 0)
+                                       goto out;
                        } else {
                                ret = send_unlink(sctx, cur->full_path);
                                if (ret < 0)
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "reference");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen &&
            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "xattr");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
 {
        int ret = 0;
 
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "extent");
+               return -EIO;
+       }
 
        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                if (result != BTRFS_COMPARE_TREE_DELETED)
index 864ce33..4071fe2 100644 (file)
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = btrfs_sb(sb)->tree_root;
 
+       root->fs_info->fs_frozen = 1;
+       /*
+        * We don't need a barrier here, we'll wait for any transaction that
+        * could be in progress on other threads (and do delayed iputs that
+        * we want to avoid on a frozen filesystem), or do the commit
+        * ourselves.
+        */
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
                /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
        return btrfs_commit_transaction(trans, root);
 }
 
+static int btrfs_unfreeze(struct super_block *sb)
+{
+       struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+       root->fs_info->fs_frozen = 0;
+       return 0;
+}
+
 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
        .statfs         = btrfs_statfs,
        .remount_fs     = btrfs_remount,
        .freeze_fs      = btrfs_freeze,
+       .unfreeze_fs    = btrfs_unfreeze,
 };
 
 static const struct file_operations btrfs_ctl_fops = {
index 9cca0a7..95d4191 100644 (file)
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
+       /*
+        * If fs has been frozen, we can not handle delayed iputs, otherwise
+        * it'll result in deadlock about SB_FREEZE_FS.
+        */
        if (current != root->fs_info->transaction_kthread &&
-           current != root->fs_info->cleaner_kthread)
+           current != root->fs_info->cleaner_kthread &&
+           !root->fs_info->fs_frozen)
                btrfs_run_delayed_iputs(root);
 
        return ret;
index d31a0c4..e935035 100644 (file)
@@ -27,6 +27,7 @@
 #include "backref.h"
 #include "hash.h"
 #include "compression.h"
+#include "qgroup.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                ins.type = BTRFS_EXTENT_ITEM_KEY;
                offset = key->offset - btrfs_file_extent_offset(eb, item);
 
+               /*
+                * Manually record dirty extent, as here we did a shallow
+                * file extent item copy and skip normal backref update,
+                * but modifying extent tree all by ourselves.
+                * So need to manually record dirty extent for qgroup,
+                * as the owner of the file extent changed from log tree
+                * (doesn't affect qgroup) to fs/file tree(affects qgroup)
+                */
+               ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+                               btrfs_file_extent_disk_bytenr(eb, item),
+                               btrfs_file_extent_disk_num_bytes(eb, item),
+                               GFP_NOFS);
+               if (ret < 0)
+                       goto out;
+
                if (ins.objectid > 0) {
                        u64 csum_start;
                        u64 csum_end;
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         */
        mutex_unlock(&root->log_mutex);
 
-       btrfs_init_log_ctx(&root_log_ctx);
+       btrfs_init_log_ctx(&root_log_ctx, NULL);
 
        mutex_lock(&log_root_tree->log_mutex);
        atomic_inc(&log_root_tree->log_batch);
@@ -4469,7 +4485,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                         const int slot,
                                         const struct btrfs_key *key,
-                                        struct inode *inode)
+                                        struct inode *inode,
+                                        u64 *other_ino)
 {
        int ret;
        struct btrfs_path *search_path;
@@ -4528,7 +4545,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                           search_path, parent,
                                           name, this_name_len, 0);
                if (di && !IS_ERR(di)) {
-                       ret = 1;
+                       struct btrfs_key di_key;
+
+                       btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+                                                 di, &di_key);
+                       if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+                               ret = 1;
+                               *other_ino = di_key.objectid;
+                       } else {
+                               ret = -EAGAIN;
+                       }
                        goto out;
                } else if (IS_ERR(di)) {
                        ret = PTR_ERR(di);
@@ -4722,16 +4748,72 @@ again:
                if ((min_key.type == BTRFS_INODE_REF_KEY ||
                     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
                    BTRFS_I(inode)->generation == trans->transid) {
+                       u64 other_ino = 0;
+
                        ret = btrfs_check_ref_name_override(path->nodes[0],
                                                            path->slots[0],
-                                                           &min_key, inode);
+                                                           &min_key, inode,
+                                                           &other_ino);
                        if (ret < 0) {
                                err = ret;
                                goto out_unlock;
-                       } else if (ret > 0) {
-                               err = 1;
-                               btrfs_set_log_full_commit(root->fs_info, trans);
-                               goto out_unlock;
+                       } else if (ret > 0 && ctx &&
+                                  other_ino != btrfs_ino(ctx->inode)) {
+                               struct btrfs_key inode_key;
+                               struct inode *other_inode;
+
+                               if (ins_nr > 0) {
+                                       ins_nr++;
+                               } else {
+                                       ins_nr = 1;
+                                       ins_start_slot = path->slots[0];
+                               }
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, ins_start_slot,
+                                                ins_nr, inode_only,
+                                                logged_isize);
+                               if (ret < 0) {
+                                       err = ret;
+                                       goto out_unlock;
+                               }
+                               ins_nr = 0;
+                               btrfs_release_path(path);
+                               inode_key.objectid = other_ino;
+                               inode_key.type = BTRFS_INODE_ITEM_KEY;
+                               inode_key.offset = 0;
+                               other_inode = btrfs_iget(root->fs_info->sb,
+                                                        &inode_key, root,
+                                                        NULL);
+                               /*
+                                * If the other inode that had a conflicting dir
+                                * entry was deleted in the current transaction,
+                                * we don't need to do more work nor fallback to
+                                * a transaction commit.
+                                */
+                               if (IS_ERR(other_inode) &&
+                                   PTR_ERR(other_inode) == -ENOENT) {
+                                       goto next_key;
+                               } else if (IS_ERR(other_inode)) {
+                                       err = PTR_ERR(other_inode);
+                                       goto out_unlock;
+                               }
+                               /*
+                                * We are safe logging the other inode without
+                                * acquiring its i_mutex as long as we log with
+                                * the LOG_INODE_EXISTS mode. We're safe against
+                                * concurrent renames of the other inode as well
+                                * because during a rename we pin the log and
+                                * update the log with the new name before we
+                                * unpin it.
+                                */
+                               err = btrfs_log_inode(trans, root, other_inode,
+                                                     LOG_INODE_EXISTS,
+                                                     0, LLONG_MAX, ctx);
+                               iput(other_inode);
+                               if (err)
+                                       goto out_unlock;
+                               else
+                                       goto next_key;
                        }
                }
 
@@ -4799,7 +4881,7 @@ next_slot:
                        ins_nr = 0;
                }
                btrfs_release_path(path);
-
+next_key:
                if (min_key.offset < (u64)-1) {
                        min_key.offset++;
                } else if (min_key.type < max_key.type) {
@@ -4993,8 +5075,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
                        break;
 
-               if (IS_ROOT(parent))
+               if (IS_ROOT(parent)) {
+                       inode = d_inode(parent);
+                       if (btrfs_must_commit_transaction(trans, inode))
+                               ret = 1;
                        break;
+               }
 
                parent = dget_parent(parent);
                dput(old_parent);
index a9f1b75..ab858e3 100644 (file)
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
        int log_transid;
        int io_err;
        bool log_new_dentries;
+       struct inode *inode;
        struct list_head list;
 };
 
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+                                     struct inode *inode)
 {
        ctx->log_ret = 0;
        ctx->log_transid = 0;
        ctx->io_err = 0;
        ctx->log_new_dentries = false;
+       ctx->inode = inode;
        INIT_LIST_HEAD(&ctx->list);
 }
 
index 51f1255..035efce 100644 (file)
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work)
        struct btrfs_device *device;
 
        device = container_of(work, struct btrfs_device, rcu_work);
-
-       if (device->bdev)
-               blkdev_put(device->bdev, device->mode);
-
        rcu_string_free(device->name);
        kfree(device);
 }
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head)
        schedule_work(&device->rcu_work);
 }
 
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+       if (device->bdev && device->writeable) {
+               sync_blockdev(device->bdev);
+               invalidate_bdev(device->bdev);
+       }
+
+       if (device->bdev)
+               blkdev_put(device->bdev, device->mode);
+}
+
 static void btrfs_close_one_device(struct btrfs_device *device)
 {
        struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
        if (device->missing)
                fs_devices->missing_devices--;
 
-       if (device->bdev && device->writeable) {
-               sync_blockdev(device->bdev);
-               invalidate_bdev(device->bdev);
-       }
+       btrfs_close_bdev(device);
 
        new_device = btrfs_alloc_device(NULL, &device->devid,
                                        device->uuid);
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
                btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        }
 
+       btrfs_close_bdev(device);
+
        call_rcu(&device->rcu, free_device);
 
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
                /* zero out the old super if it is writable */
                btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
+
+       btrfs_close_bdev(srcdev);
+
        call_rcu(&srcdev->rcu, free_device);
 
        /*
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
         * the device_list_mutex lock.
         */
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+       btrfs_close_bdev(tgtdev);
        call_rcu(&tgtdev->rcu, free_device);
 }
 
index 99115ca..16e6ded 100644 (file)
@@ -1347,9 +1347,12 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
 {
        struct inode *inode = &ci->vfs_inode;
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-       struct ceph_mds_session *session = *psession;
+       struct ceph_mds_session *session = NULL;
        int mds;
+
        dout("ceph_flush_snaps %p\n", inode);
+       if (psession)
+               session = *psession;
 retry:
        spin_lock(&ci->i_ceph_lock);
        if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
index fa59a85..f72d4ae 100644 (file)
@@ -2759,6 +2759,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
        } else {
                path = NULL;
                pathlen = 0;
+               pathbase = 0;
        }
 
        spin_lock(&ci->i_ceph_lock);
index eea6491..466f7d6 100644 (file)
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops;
 static const struct file_operations format3_fops;
 static const struct file_operations format4_fops;
 
-static int table_open(struct inode *inode, struct file *file)
+static int table_open1(struct inode *inode, struct file *file)
 {
        struct seq_file *seq;
-       int ret = -1;
+       int ret;
 
-       if (file->f_op == &format1_fops)
-               ret = seq_open(file, &format1_seq_ops);
-       else if (file->f_op == &format2_fops)
-               ret = seq_open(file, &format2_seq_ops);
-       else if (file->f_op == &format3_fops)
-               ret = seq_open(file, &format3_seq_ops);
-       else if (file->f_op == &format4_fops)
-               ret = seq_open(file, &format4_seq_ops);
+       ret = seq_open(file, &format1_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open2(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &format2_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open3(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &format3_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open4(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
 
+       ret = seq_open(file, &format4_seq_ops);
        if (ret)
                return ret;
 
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file)
 
 static const struct file_operations format1_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open1,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = {
 
 static const struct file_operations format2_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open2,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = {
 
 static const struct file_operations format3_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open3,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = {
 
 static const struct file_operations format4_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open4,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
index 3131747..c6ea25a 100644 (file)
@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                                      sbi->s_want_extra_isize,
                                                      iloc, handle);
                        if (ret) {
-                               ext4_set_inode_state(inode,
-                                                    EXT4_STATE_NO_EXPAND);
                                if (mnt_count !=
                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
                                        ext4_warning(inode->i_sb,
index 1c593aa..3ec8708 100644 (file)
@@ -2211,6 +2211,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
 
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors(struct super_block *sb,
+                                 ext4_fsblk_t sb_block,
                                  ext4_group_t *first_not_zeroed)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2241,6 +2242,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        grp = i;
 
                block_bitmap = ext4_block_bitmap(sb, gdp);
+               if (block_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Block bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Block bitmap for group %u not in group "
@@ -2248,6 +2254,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
+               if (inode_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Inode bitmap for group %u not in group "
@@ -2255,6 +2266,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
+               if (inode_table == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode table for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -3757,7 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
-       if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+       if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
                ret = -EFSCORRUPTED;
                goto failed_mount2;
index 39e9cfb..2eb935c 100644 (file)
@@ -1353,15 +1353,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
        size_t min_offs, free;
        int total_ino;
        void *base, *start, *end;
-       int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
+       int error = 0, tried_min_extra_isize = 0;
        int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+       int isize_diff; /* How much do we need to grow i_extra_isize */
 
        down_write(&EXT4_I(inode)->xattr_sem);
+       /*
+        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+        */
+       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 retry:
-       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
-               up_write(&EXT4_I(inode)->xattr_sem);
-               return 0;
-       }
+       isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+               goto out;
 
        header = IHDR(inode, raw_inode);
        entry = IFIRST(header);
@@ -1382,7 +1386,7 @@ retry:
                goto cleanup;
 
        free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
-       if (free >= new_extra_isize) {
+       if (free >= isize_diff) {
                entry = IFIRST(header);
                ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
                                - new_extra_isize, (void *)raw_inode +
@@ -1390,8 +1394,7 @@ retry:
                                (void *)header, total_ino,
                                inode->i_sb->s_blocksize);
                EXT4_I(inode)->i_extra_isize = new_extra_isize;
-               error = 0;
-               goto cleanup;
+               goto out;
        }
 
        /*
@@ -1414,7 +1417,7 @@ retry:
                end = bh->b_data + bh->b_size;
                min_offs = end - base;
                free = ext4_xattr_free_space(first, &min_offs, base, NULL);
-               if (free < new_extra_isize) {
+               if (free < isize_diff) {
                        if (!tried_min_extra_isize && s_min_extra_isize) {
                                tried_min_extra_isize++;
                                new_extra_isize = s_min_extra_isize;
@@ -1428,7 +1431,7 @@ retry:
                free = inode->i_sb->s_blocksize;
        }
 
-       while (new_extra_isize > 0) {
+       while (isize_diff > 0) {
                size_t offs, size, entry_size;
                struct ext4_xattr_entry *small_entry = NULL;
                struct ext4_xattr_info i = {
@@ -1459,7 +1462,7 @@ retry:
                        EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
                                        EXT4_XATTR_LEN(last->e_name_len);
                        if (total_size <= free && total_size < min_total_size) {
-                               if (total_size < new_extra_isize) {
+                               if (total_size < isize_diff) {
                                        small_entry = last;
                                } else {
                                        entry = last;
@@ -1514,22 +1517,22 @@ retry:
                error = ext4_xattr_ibody_set(handle, inode, &i, is);
                if (error)
                        goto cleanup;
+               total_ino -= entry_size;
 
                entry = IFIRST(header);
-               if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
-                       shift_bytes = new_extra_isize;
+               if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+                       shift_bytes = isize_diff;
                else
-                       shift_bytes = entry_size + size;
+                       shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
                /* Adjust the offsets and shift the remaining entries ahead */
-               ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
-                       shift_bytes, (void *)raw_inode +
-                       EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
-                       (void *)header, total_ino - entry_size,
-                       inode->i_sb->s_blocksize);
+               ext4_xattr_shift_entries(entry, -shift_bytes,
+                       (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+                       EXT4_I(inode)->i_extra_isize + shift_bytes,
+                       (void *)header, total_ino, inode->i_sb->s_blocksize);
 
-               extra_isize += shift_bytes;
-               new_extra_isize -= shift_bytes;
-               EXT4_I(inode)->i_extra_isize = extra_isize;
+               isize_diff -= shift_bytes;
+               EXT4_I(inode)->i_extra_isize += shift_bytes;
+               header = IHDR(inode, raw_inode);
 
                i.name = b_entry_name;
                i.value = buffer;
@@ -1551,6 +1554,8 @@ retry:
                kfree(bs);
        }
        brelse(bh);
+out:
+       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
        up_write(&EXT4_I(inode)->xattr_sem);
        return 0;
 
@@ -1562,6 +1567,10 @@ cleanup:
        kfree(is);
        kfree(bs);
        brelse(bh);
+       /*
+        * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
+        * size expansion failed.
+        */
        up_write(&EXT4_I(inode)->xattr_sem);
        return error;
 }
index 69dd3e6..a92e783 100644 (file)
@@ -24,6 +24,7 @@
 #define EXT4_XATTR_INDEX_SYSTEM                        7
 #define EXT4_XATTR_INDEX_RICHACL               8
 #define EXT4_XATTR_INDEX_ENCRYPTION            9
+#define EXT4_XATTR_INDEX_HURD                  10 /* Reserved for Hurd */
 
 struct ext4_xattr_header {
        __le32  h_magic;        /* magic number for identification */
index d64d2a5..ccb401e 100644 (file)
@@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file,
        trace_f2fs_write_end(inode, pos, len, copied);
 
        set_page_dirty(page);
-       f2fs_put_page(page, 1);
 
        if (pos + copied > i_size_read(inode))
                f2fs_i_size_write(inode, pos + copied);
 
+       f2fs_put_page(page, 1);
        f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
        return copied;
 }
index 675fa79..14f5fe2 100644 (file)
@@ -538,7 +538,7 @@ struct f2fs_nm_info {
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
        struct radix_tree_root nat_set_root;/* root of the nat set cache */
-       struct percpu_rw_semaphore nat_tree_lock;       /* protect nat_tree_lock */
+       struct rw_semaphore nat_tree_lock;      /* protect nat_tree_lock */
        struct list_head nat_entries;   /* cached nat entry list (clean) */
        unsigned int nat_cnt;           /* the # of cached nat entries */
        unsigned int dirty_nat_cnt;     /* total num of nat entries in set */
@@ -787,7 +787,7 @@ struct f2fs_sb_info {
        struct f2fs_checkpoint *ckpt;           /* raw checkpoint pointer */
        struct inode *meta_inode;               /* cache meta blocks */
        struct mutex cp_mutex;                  /* checkpoint procedure lock */
-       struct percpu_rw_semaphore cp_rwsem;            /* blocking FS operations */
+       struct rw_semaphore cp_rwsem;           /* blocking FS operations */
        struct rw_semaphore node_write;         /* locking node writes */
        wait_queue_head_t cp_wait;
        unsigned long last_time[MAX_TIME];      /* to store time in jiffies */
@@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-       percpu_down_read(&sbi->cp_rwsem);
+       down_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-       percpu_up_read(&sbi->cp_rwsem);
+       up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-       percpu_down_write(&sbi->cp_rwsem);
+       down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-       percpu_up_write(&sbi->cp_rwsem);
+       up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
index 0e493f6..47abb96 100644 (file)
@@ -2086,15 +2086,19 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
        if (unlikely(f2fs_readonly(src->i_sb)))
                return -EROFS;
 
-       if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode))
-               return -EISDIR;
+       if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
+               return -EINVAL;
 
        if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
                return -EOPNOTSUPP;
 
        inode_lock(src);
-       if (src != dst)
-               inode_lock(dst);
+       if (src != dst) {
+               if (!inode_trylock(dst)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
 
        ret = -EINVAL;
        if (pos_in + len > src->i_size || pos_in + len < pos_in)
@@ -2152,6 +2156,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
 out_unlock:
        if (src != dst)
                inode_unlock(dst);
+out:
        inode_unlock(src);
        return ret;
 }
index b2fa4b6..f75d197 100644 (file)
@@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool need = false;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                if (!get_nat_flag(e, IS_CHECKPOINTED) &&
                                !get_nat_flag(e, HAS_FSYNCED_INODE))
                        need = true;
        }
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return need;
 }
 
@@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool is_cp = true;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e && !get_nat_flag(e, IS_CHECKPOINTED))
                is_cp = false;
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return is_cp;
 }
 
@@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
        struct nat_entry *e;
        bool need_update = true;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ino);
        if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
                        (get_nat_flag(e, IS_CHECKPOINTED) ||
                         get_nat_flag(e, HAS_FSYNCED_INODE)))
                need_update = false;
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return need_update;
 }
 
@@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *e;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ni->nid);
        if (!e) {
                e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                        set_nat_flag(e, HAS_FSYNCED_INODE, true);
                set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        int nr = nr_shrink;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       if (!down_write_trylock(&nm_i->nat_tree_lock))
+               return 0;
 
        while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
                struct nat_entry *ne;
@@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
                __del_from_nat_cache(nm_i, ne);
                nr_shrink--;
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
        return nr - nr_shrink;
 }
 
@@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        ni->nid = nid;
 
        /* Check nat cache */
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                ni->ino = nat_get_ino(e);
                ni->blk_addr = nat_get_blkaddr(e);
                ni->version = nat_get_version(e);
-               percpu_up_read(&nm_i->nat_tree_lock);
+               up_read(&nm_i->nat_tree_lock);
                return;
        }
 
@@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        node_info_from_raw_nat(ni, &ne);
        f2fs_put_page(page, 1);
 cache:
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        /* cache nat entry */
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        cache_nat_entry(sbi, nid, &ne);
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                        META_NAT, true);
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
 
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
                        remove_free_nid(nm_i, nid);
        }
        up_read(&curseg->journal_rwsem);
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
 
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
                                        nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        if (!nm_i->dirty_nat_cnt)
                return;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
 
        /*
         * if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        list_for_each_entry_safe(set, tmp, &sets, set_list)
                __flush_nat_entry_set(sbi, set);
 
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 
        f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
        mutex_init(&nm_i->build_lock);
        spin_lock_init(&nm_i->free_nid_list_lock);
-       if (percpu_init_rwsem(&nm_i->nat_tree_lock))
-               return -ENOMEM;
+       init_rwsem(&nm_i->nat_tree_lock);
 
        nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
        nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        spin_unlock(&nm_i->free_nid_list_lock);
 
        /* destroy nat cache */
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        while ((found = __gang_lookup_nat_cache(nm_i,
                                        nid, NATVEC_SIZE, natvec))) {
                unsigned idx;
@@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
                        kmem_cache_free(nat_entry_set_slab, setvec[idx]);
                }
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 
-       percpu_free_rwsem(&nm_i->nat_tree_lock);
        kfree(nm_i->nat_bitmap);
        sbi->nm_info = NULL;
        kfree(nm_i);
index 1b86d3f..7f863a6 100644 (file)
@@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
                percpu_counter_destroy(&sbi->nr_pages[i]);
        percpu_counter_destroy(&sbi->alloc_valid_block_count);
        percpu_counter_destroy(&sbi->total_valid_inode_count);
-
-       percpu_free_rwsem(&sbi->cp_rwsem);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 {
        int i, err;
 
-       if (percpu_init_rwsem(&sbi->cp_rwsem))
-               return -ENOMEM;
-
        for (i = 0; i < NR_COUNT_TYPE; i++) {
                err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
                if (err)
@@ -1686,6 +1681,7 @@ try_onemore:
                sbi->write_io[i].bio = NULL;
        }
 
+       init_rwsem(&sbi->cp_rwsem);
        init_waitqueue_head(&sbi->cp_wait);
        init_sb_info(sbi);
 
index 4d09d44..05713a5 100644 (file)
@@ -1949,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 {
        struct backing_dev_info *bdi;
 
+       /*
+        * If we are expecting writeback progress we must submit plugged IO.
+        */
+       if (blk_needs_flush_plug(current))
+               blk_schedule_flush_plug(current);
+
        if (!nr_pages)
                nr_pages = get_nr_dirty_pages();
 
index 48141b8..0342254 100644 (file)
@@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
         * Now the data has been copied, commit the range we've copied.  This
         * should not fail unless the filesystem has had a fatal error.
         */
-       ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
-                       flags, &iomap);
+       if (ops->iomap_end) {
+               ret = ops->iomap_end(inode, pos, length,
+                                    written > 0 ? written : 0,
+                                    flags, &iomap);
+       }
 
        return written ? written : ret;
 }
@@ -194,12 +197,9 @@ again:
                if (mapping_writably_mapped(inode->i_mapping))
                        flush_dcache_page(page);
 
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
 
                flush_dcache_page(page);
-               mark_page_accessed(page);
 
                status = iomap_write_end(inode, pos, bytes, copied, page);
                if (unlikely(status < 0))
@@ -470,13 +470,18 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
        if (ret)
                return ret;
 
-       ret = filemap_write_and_wait(inode->i_mapping);
-       if (ret)
-               return ret;
+       if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
+               ret = filemap_write_and_wait(inode->i_mapping);
+               if (ret)
+                       return ret;
+       }
 
        while (len > 0) {
                ret = iomap_apply(inode, start, len, 0, ops, &ctx,
                                iomap_fiemap_actor);
+               /* inode with no (attribute) mapping will give ENOENT */
+               if (ret == -ENOENT)
+                       break;
                if (ret < 0)
                        return ret;
                if (ret == 0)
index 33da841..6f47527 100644 (file)
@@ -338,6 +338,8 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
        case 0:
                break;
        case -NFS4ERR_EXPIRED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_DELEG_REVOKED:
        case -NFS4ERR_STALE_STATEID:
        case -NFS4ERR_OLD_STATEID:
        case -NFS4ERR_BAD_STATEID:
index 324bfdc..9bf64ea 100644 (file)
@@ -396,6 +396,10 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed);
+
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
index a036e93..1949bbd 100644 (file)
@@ -4237,12 +4237,9 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
                err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
                trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
                if (err == 0) {
-                       struct nfs_client *clp = server->nfs_client;
-
-                       spin_lock(&clp->cl_lock);
-                       clp->cl_lease_time = fsinfo->lease_time * HZ;
-                       clp->cl_last_renewal = now;
-                       spin_unlock(&clp->cl_lock);
+                       nfs4_set_lease_period(server->nfs_client,
+                                       fsinfo->lease_time * HZ,
+                                       now);
                        break;
                }
                err = nfs4_handle_exception(server, err, &exception);
index e1ba58c..82e7719 100644 (file)
@@ -136,6 +136,26 @@ nfs4_kill_renewd(struct nfs_client *clp)
        cancel_delayed_work_sync(&clp->cl_renewd);
 }
 
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed)
+{
+       spin_lock(&clp->cl_lock);
+       clp->cl_lease_time = lease;
+       clp->cl_last_renewal = lastrenewed;
+       spin_unlock(&clp->cl_lock);
+
+       /* Cap maximum reconnect timeout at 1/2 lease period */
+       rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
 /*
  * Local variables:
  *   c-basic-offset: 8
index 834b875..cada00a 100644 (file)
@@ -277,20 +277,17 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 {
        int status;
        struct nfs_fsinfo fsinfo;
+       unsigned long now;
 
        if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
                nfs4_schedule_state_renewal(clp);
                return 0;
        }
 
+       now = jiffies;
        status = nfs4_proc_get_lease_time(clp, &fsinfo);
        if (status == 0) {
-               /* Update lease time and schedule renewal */
-               spin_lock(&clp->cl_lock);
-               clp->cl_lease_time = fsinfo.lease_time * HZ;
-               clp->cl_last_renewal = jiffies;
-               spin_unlock(&clp->cl_lock);
-
+               nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
                nfs4_schedule_state_renewal(clp);
        }
 
index 8410ca2..a204d7e 100644 (file)
@@ -4903,6 +4903,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        return nfs_ok;
 }
 
+static __be32
+nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
+{
+       struct nfs4_ol_stateid *stp = openlockstateid(s);
+       __be32 ret;
+
+       mutex_lock(&stp->st_mutex);
+
+       ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+       if (ret)
+               goto out;
+
+       ret = nfserr_locks_held;
+       if (check_for_locks(stp->st_stid.sc_file,
+                           lockowner(stp->st_stateowner)))
+               goto out;
+
+       release_lock_stateid(stp);
+       ret = nfs_ok;
+
+out:
+       mutex_unlock(&stp->st_mutex);
+       nfs4_put_stid(s);
+       return ret;
+}
+
 __be32
 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                   struct nfsd4_free_stateid *free_stateid)
@@ -4910,7 +4936,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        stateid_t *stateid = &free_stateid->fr_stateid;
        struct nfs4_stid *s;
        struct nfs4_delegation *dp;
-       struct nfs4_ol_stateid *stp;
        struct nfs4_client *cl = cstate->session->se_client;
        __be32 ret = nfserr_bad_stateid;
 
@@ -4929,18 +4954,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                ret = nfserr_locks_held;
                break;
        case NFS4_LOCK_STID:
-               ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
-               if (ret)
-                       break;
-               stp = openlockstateid(s);
-               ret = nfserr_locks_held;
-               if (check_for_locks(stp->st_stid.sc_file,
-                                   lockowner(stp->st_stateowner)))
-                       break;
-               WARN_ON(!unhash_lock_stateid(stp));
+               atomic_inc(&s->sc_count);
                spin_unlock(&cl->cl_lock);
-               nfs4_put_stid(s);
-               ret = nfs_ok;
+               ret = nfsd4_free_lock_stateid(stateid, s);
                goto out;
        case NFS4_REVOKED_DELEG_STID:
                dp = delegstateid(s);
@@ -5507,7 +5523,7 @@ static __be32
 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
                            struct nfs4_ol_stateid *ost,
                            struct nfsd4_lock *lock,
-                           struct nfs4_ol_stateid **lst, bool *new)
+                           struct nfs4_ol_stateid **plst, bool *new)
 {
        __be32 status;
        struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5515,7 +5531,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
        struct nfs4_client *cl = oo->oo_owner.so_client;
        struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
        struct nfs4_lockowner *lo;
+       struct nfs4_ol_stateid *lst;
        unsigned int strhashval;
+       bool hashed;
 
        lo = find_lockowner_str(cl, &lock->lk_new_owner);
        if (!lo) {
@@ -5531,12 +5549,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
                        goto out;
        }
 
-       *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
-       if (*lst == NULL) {
+retry:
+       lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+       if (lst == NULL) {
                status = nfserr_jukebox;
                goto out;
        }
+
+       mutex_lock(&lst->st_mutex);
+
+       /* See if it's still hashed to avoid race with FREE_STATEID */
+       spin_lock(&cl->cl_lock);
+       hashed = !list_empty(&lst->st_perfile);
+       spin_unlock(&cl->cl_lock);
+
+       if (!hashed) {
+               mutex_unlock(&lst->st_mutex);
+               nfs4_put_stid(&lst->st_stid);
+               goto retry;
+       }
        status = nfs_ok;
+       *plst = lst;
 out:
        nfs4_put_stateowner(&lo->lo_owner);
        return status;
@@ -5603,8 +5636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        goto out;
                status = lookup_or_create_lock_state(cstate, open_stp, lock,
                                                        &lock_stp, &new);
-               if (status == nfs_ok)
-                       mutex_lock(&lock_stp->st_mutex);
        } else {
                status = nfs4_preprocess_seqid_op(cstate,
                                       lock->lk_old_lock_seqid,
index ba94412..ff476e6 100644 (file)
@@ -1252,10 +1252,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dchild))
                return nfserrno(host_err);
        err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
-       if (err) {
-               dput(dchild);
+       /*
+        * We unconditionally drop our ref to dchild as fh_compose will have
+        * already grabbed its own ref for it.
+        */
+       dput(dchild);
+       if (err)
                return err;
-       }
        return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
                                        rdev, resfhp);
 }
index 4b32928..4ebe6b2 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -144,10 +144,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
        struct page *page = buf->page;
 
        if (page_count(page) == 1) {
-               if (memcg_kmem_enabled()) {
+               if (memcg_kmem_enabled())
                        memcg_kmem_uncharge(page, 0);
-                       __ClearPageKmemcg(page);
-               }
                __SetPageLocked(page);
                return 0;
        }
index 09e18fd..b9a8c81 100644 (file)
@@ -46,7 +46,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                cached = 0;
 
        for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
        available = si_mem_available();
 
index 19f532e..6dc4296 100644 (file)
@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                size -= n;
                buf += n;
                copied += n;
-               if (!m->count)
+               if (!m->count) {
+                       m->from = 0;
                        m->index++;
+               }
                if (!size)
                        goto Done;
        }
index b45345d..51157da 100644 (file)
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
 
        p = c->gap_lebs;
        do {
-               ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+               ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
                written = layout_leb_in_gaps(c, p);
                if (written < 0) {
                        err = written;
index e237811..11a0041 100644 (file)
@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler,
        dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
                inode->i_ino, dentry, size);
 
-       return  __ubifs_getxattr(inode, name, buffer, size);
+       name = xattr_full_name(handler, name);
+       return __ubifs_getxattr(inode, name, buffer, size);
 }
 
 static int ubifs_xattr_set(const struct xattr_handler *handler,
@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler,
        dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
                name, inode->i_ino, dentry, size);
 
+       name = xattr_full_name(handler, name);
+
        if (value)
                return __ubifs_setxattr(inode, name, value, size, flags);
        else
index 776ae2f..3dd8f1d 100644 (file)
@@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small(
        xfs_extlen_t    *flenp, /* result length */
        int             *stat)  /* status: 0-freelist, 1-normal/none */
 {
+       struct xfs_owner_info   oinfo;
        int             error;
        xfs_agblock_t   fbno;
        xfs_extlen_t    flen;
@@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small(
                                error0);
                        args->wasfromfl = 1;
                        trace_xfs_alloc_small_freelist(args);
+
+                       /*
+                        * If we're feeding an AGFL block to something that
+                        * doesn't live in the free space, we need to clear
+                        * out the OWN_AG rmap.
+                        */
+                       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+                       error = xfs_rmap_free(args->tp, args->agbp, args->agno,
+                                       fbno, 1, &oinfo);
+                       if (error)
+                               goto error0;
+
                        *stat = 0;
                        return 0;
                }
@@ -2264,6 +2277,7 @@ xfs_alloc_log_agf(
                offsetof(xfs_agf_t, agf_longest),
                offsetof(xfs_agf_t, agf_btreeblks),
                offsetof(xfs_agf_t, agf_uuid),
+               offsetof(xfs_agf_t, agf_rmap_blocks),
                sizeof(xfs_agf_t)
        };
 
index f814d42..e6a8bea 100644 (file)
@@ -640,12 +640,15 @@ typedef struct xfs_agf {
        __be32          agf_btreeblks;  /* # of blocks held in AGF btrees */
        uuid_t          agf_uuid;       /* uuid of filesystem */
 
+       __be32          agf_rmap_blocks;        /* rmapbt blocks used */
+       __be32          agf_padding;            /* padding */
+
        /*
         * reserve some contiguous space for future logged fields before we add
         * the unlogged fields. This makes the range logging via flags and
         * structure offsets much simpler.
         */
-       __be64          agf_spare64[16];
+       __be64          agf_spare64[15];
 
        /* unlogged fields, written during buffer writeback. */
        __be64          agf_lsn;        /* last write sequence */
@@ -670,7 +673,8 @@ typedef struct xfs_agf {
 #define        XFS_AGF_LONGEST         0x00000400
 #define        XFS_AGF_BTREEBLKS       0x00000800
 #define        XFS_AGF_UUID            0x00001000
-#define        XFS_AGF_NUM_BITS        13
+#define        XFS_AGF_RMAP_BLOCKS     0x00002000
+#define        XFS_AGF_NUM_BITS        14
 #define        XFS_AGF_ALL_BITS        ((1 << XFS_AGF_NUM_BITS) - 1)
 
 #define XFS_AGF_FLAGS \
@@ -686,7 +690,8 @@ typedef struct xfs_agf {
        { XFS_AGF_FREEBLKS,     "FREEBLKS" }, \
        { XFS_AGF_LONGEST,      "LONGEST" }, \
        { XFS_AGF_BTREEBLKS,    "BTREEBLKS" }, \
-       { XFS_AGF_UUID,         "UUID" }
+       { XFS_AGF_UUID,         "UUID" }, \
+       { XFS_AGF_RMAP_BLOCKS,  "RMAP_BLOCKS" }
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGF_DADDR(mp)      ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
index bc1faeb..17b8eeb 100644 (file)
@@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block(
        union xfs_btree_ptr     *new,
        int                     *stat)
 {
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
        int                     error;
        xfs_agblock_t           bno;
 
@@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block(
 
        xfs_trans_agbtree_delta(cur->bc_tp, 1);
        new->s = cpu_to_be32(bno);
+       be32_add_cpu(&agf->agf_rmap_blocks, 1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 
        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
        *stat = 1;
@@ -143,6 +147,8 @@ xfs_rmapbt_free_block(
        bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
        trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
                        bno, 1);
+       be32_add_cpu(&agf->agf_rmap_blocks, -1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
        error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
        if (error)
                return error;
index 47a318c..607cc29 100644 (file)
@@ -115,7 +115,6 @@ xfs_buf_ioacct_dec(
        if (!(bp->b_flags & _XBF_IN_FLIGHT))
                return;
 
-       ASSERT(bp->b_flags & XBF_ASYNC);
        bp->b_flags &= ~_XBF_IN_FLIGHT;
        percpu_counter_dec(&bp->b_target->bt_io_count);
 }
index ed95e5b..e612a02 100644 (file)
@@ -741,9 +741,20 @@ xfs_file_dax_write(
         * page is inserted into the pagecache when we have to serve a write
         * fault on a hole.  It should never be dirtied and can simply be
         * dropped from the pagecache once we get real data for the page.
+        *
+        * XXX: This is racy against mmap, and there's nothing we can do about
+        * it. dax_do_io() should really do this invalidation internally as
+        * it will know if we've allocated over a holei for this specific IO and
+        * if so it needs to update the mapping tree and invalidate existing
+        * PTEs over the newly allocated range. Remove this invalidation when
+        * dax_do_io() is fixed up.
         */
        if (mapping->nrpages) {
-               ret = invalidate_inode_pages2(mapping);
+               loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
+
+               ret = invalidate_inode_pages2_range(mapping,
+                                                   iocb->ki_pos >> PAGE_SHIFT,
+                                                   end >> PAGE_SHIFT);
                WARN_ON_ONCE(ret);
        }
 
index 0f96847..0b7f986 100644 (file)
@@ -248,6 +248,7 @@ xfs_growfs_data_private(
                        agf->agf_roots[XFS_BTNUM_RMAPi] =
                                                cpu_to_be32(XFS_RMAP_BLOCK(mp));
                        agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+                       agf->agf_rmap_blocks = cpu_to_be32(1);
                }
 
                agf->agf_flfirst = cpu_to_be32(1);
index 2114d53..2af0dda 100644 (file)
@@ -715,12 +715,16 @@ xfs_iomap_write_allocate(
                 * is in the delayed allocation extent on which we sit
                 * but before our buffer starts.
                 */
-
                nimaps = 0;
                while (nimaps == 0) {
                        nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-
-                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+                       /*
+                        * We have already reserved space for the extent and any
+                        * indirect blocks when creating the delalloc extent,
+                        * there is no need to reserve space in this transaction
+                        * again.
+                        */
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
                                        0, XFS_TRANS_RESERVE, &tp);
                        if (error)
                                return error;
@@ -1037,20 +1041,14 @@ xfs_file_iomap_begin(
                        return error;
 
                trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
-       } else if (nimaps) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
        } else {
+               ASSERT(nimaps);
+
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
-               iomap->blkno = IOMAP_NULL_BLOCK;
-               iomap->type = IOMAP_HOLE;
-               iomap->offset = offset;
-               iomap->length = length;
+               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
        }
 
+       xfs_bmbt_to_iomap(ip, iomap, &imap);
        return 0;
 }
 
@@ -1112,3 +1110,48 @@ struct iomap_ops xfs_iomap_ops = {
        .iomap_begin            = xfs_file_iomap_begin,
        .iomap_end              = xfs_file_iomap_end,
 };
+
+static int
+xfs_xattr_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1, error = 0;
+       unsigned                lockmode;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lockmode = xfs_ilock_data_map_shared(ip);
+
+       /* if there are no attribute fork or extents, return ENOENT */
+       if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+               error = -ENOENT;
+               goto out_unlock;
+       }
+
+       ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                              &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+
+       if (!error) {
+               ASSERT(nimaps);
+               xfs_bmbt_to_iomap(ip, iomap, &imap);
+       }
+
+       return error;
+}
+
+struct iomap_ops xfs_xattr_iomap_ops = {
+       .iomap_begin            = xfs_xattr_iomap_begin,
+};
index e066d04..fb8aca3 100644 (file)
@@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                struct xfs_bmbt_irec *);
 
 extern struct iomap_ops xfs_iomap_ops;
+extern struct iomap_ops xfs_xattr_iomap_ops;
 
 #endif /* __XFS_IOMAP_H__*/
index ab820f8..b24c310 100644 (file)
@@ -1009,7 +1009,14 @@ xfs_vn_fiemap(
        int                     error;
 
        xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
-       error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_xattr_iomap_ops);
+       } else {
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_iomap_ops);
+       }
        xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
 
        return error;
index 551b7e2..7e88bec 100644 (file)
@@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
index 54a8e65..7d026bf 100644 (file)
 #include <asm-generic/qrwlock_types.h>
 
 /*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ *       | +0 | +1 | +2 | +3 |
+ *   ----+----+----+----+----+
+ *    LE | 78 | 56 | 34 | 12 | 0x12345678
+ *   ----+----+----+----+----+
+ *       | wr |      rd      |
+ *       +----+----+----+----+
+ *
+ *   ----+----+----+----+----+
+ *    BE | 12 | 34 | 56 | 78 | 0x12345678
+ *   ----+----+----+----+----+
+ *       |      rd      | wr |
+ *       +----+----+----+----+
  */
 #define        _QW_WAITING     1               /* A writer is waiting     */
 #define        _QW_LOCKED      0xff            /* A writer holds the lock */
@@ -133,13 +146,23 @@ static inline void queued_read_unlock(struct qrwlock *lock)
        (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
+/**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+       return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
 /**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
-       smp_store_release((u8 *)&lock->cnts, 0);
+       smp_store_release(__qrwlock_write_byte(lock), 0);
 }
 
 /*
index 4348d6d..99c6d01 100644 (file)
@@ -962,6 +962,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev,
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
+ * @interruptible: Sleep interruptible if waiting.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -976,7 +977,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev,
  */
 
 extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-                          bool evict, bool no_wait_gpu,
+                          bool evict, bool interruptible, bool no_wait_gpu,
                           struct ttm_mem_reg *new_mem);
 
 /**
index 59ffaa6..23ddf4b 100644 (file)
@@ -71,7 +71,8 @@ static inline bool bio_has_data(struct bio *bio)
 {
        if (bio &&
            bio->bi_iter.bi_size &&
-           bio_op(bio) != REQ_OP_DISCARD)
+           bio_op(bio) != REQ_OP_DISCARD &&
+           bio_op(bio) != REQ_OP_SECURE_ERASE)
                return true;
 
        return false;
@@ -79,7 +80,9 @@ static inline bool bio_has_data(struct bio *bio)
 
 static inline bool bio_no_advance_iter(struct bio *bio)
 {
-       return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME;
+       return bio_op(bio) == REQ_OP_DISCARD ||
+              bio_op(bio) == REQ_OP_SECURE_ERASE ||
+              bio_op(bio) == REQ_OP_WRITE_SAME;
 }
 
 static inline bool bio_is_rw(struct bio *bio)
@@ -199,6 +202,9 @@ static inline unsigned bio_segments(struct bio *bio)
        if (bio_op(bio) == REQ_OP_DISCARD)
                return 1;
 
+       if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+               return 1;
+
        if (bio_op(bio) == REQ_OP_WRITE_SAME)
                return 1;
 
index 2c210b6..e79055c 100644 (file)
@@ -882,7 +882,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
                                                     int op)
 {
-       if (unlikely(op == REQ_OP_DISCARD))
+       if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
                return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
        if (unlikely(op == REQ_OP_WRITE_SAME))
@@ -913,7 +913,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
        if (unlikely(rq->cmd_type != REQ_TYPE_FS))
                return q->limits.max_hw_sectors;
 
-       if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD))
+       if (!q->limits.chunk_sectors ||
+           req_op(rq) == REQ_OP_DISCARD ||
+           req_op(rq) == REQ_OP_SECURE_ERASE)
                return blk_queue_get_max_sectors(q, req_op(rq));
 
        return min(blk_max_size_offset(q, offset),
index 701b64a..89b65b8 100644 (file)
@@ -74,7 +74,8 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
                  "Attempted to advance past end of bvec iter\n");
 
        while (bytes) {
-               unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+               unsigned iter_len = bvec_iter_len(bv, *iter);
+               unsigned len = min(bytes, iter_len);
 
                bytes -= len;
                iter->bi_size -= len;
index e294939..8dbc892 100644 (file)
  */
 #define asm_volatile_goto(x...)        do { asm goto(x); asm (""); } while (0)
 
-#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+/*
+ * sparse (__CHECKER__) pretends to be gcc, but can't do constant
+ * folding in __builtin_bswap*() (yet), so don't set these for it.
+ */
+#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
 #if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
 #if GCC_VERSION >= 40800
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
-#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */
 
 #if GCC_VERSION >= 50000
 #define KASAN_ABI_VERSION 4
index 1bb9548..436aa4e 100644 (file)
@@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
  *
- * The seemingly unused void * variable is to validate @p is indeed a pointer
- * type. All pointer types silently cast to void *.
+ * The seemingly unused size_t variable is to validate @p is indeed a pointer
+ * type by making sure it can be dereferenced.
  */
 #define lockless_dereference(p) \
 ({ \
        typeof(p) _________p1 = READ_ONCE(p); \
-       __maybe_unused const void * const _________p2 = _________p1; \
+       size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
 })
index d2ba7d3..1ffbf2a 100644 (file)
@@ -304,6 +304,8 @@ struct tegra_mipi_device;
 
 struct tegra_mipi_device *tegra_mipi_request(struct device *device);
 void tegra_mipi_free(struct tegra_mipi_device *device);
+int tegra_mipi_enable(struct tegra_mipi_device *device);
+int tegra_mipi_disable(struct tegra_mipi_device *device);
 int tegra_mipi_calibrate(struct tegra_mipi_device *device);
 
 #endif
index 56b0b7e..99ac022 100644 (file)
  */
 #define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
 #define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
 #define E_ITS_MAPD_DEVICE_OOR                  0x010801
 #define E_ITS_MAPC_PROCNUM_OOR                 0x010902
index 01e908a..9c28b4d 100644 (file)
@@ -1113,8 +1113,20 @@ struct kvm_device {
 /* create, destroy, and name are mandatory */
 struct kvm_device_ops {
        const char *name;
+
+       /*
+        * create is called holding kvm->lock and any operations not suitable
+        * to do while holding the lock should be deferred to init (see
+        * below).
+        */
        int (*create)(struct kvm_device *dev, u32 type);
 
+       /*
+        * init is called after create if create is successful and is called
+        * outside of holding kvm->lock.
+        */
+       void (*init)(struct kvm_device *dev);
+
        /*
         * Destroy is responsible for freeing dev.
         *
index f2e4e90..d572b78 100644 (file)
@@ -68,8 +68,10 @@ extern char * const migratetype_names[MIGRATE_TYPES];
 
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+#  define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
 #else
 #  define is_migrate_cma(migratetype) false
+#  define is_migrate_cma_page(_page) false
 #endif
 
 #define for_each_migratetype_order(order, type) \
index 4f0bfe5..e8c81fb 100644 (file)
@@ -270,6 +270,8 @@ enum {
        MSI_FLAG_MULTI_PCI_MSI          = (1 << 2),
        /* Support PCI MSIX interrupts */
        MSI_FLAG_PCI_MSIX               = (1 << 3),
+       /* Needs early activate, required for PCI */
+       MSI_FLAG_ACTIVATE_EARLY         = (1 << 4),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
index 076df53..3a788bf 100644 (file)
@@ -3891,8 +3891,7 @@ void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 void netdev_rss_key_fill(void *buffer, size_t len);
 
-int dev_get_nest_level(struct net_device *dev,
-                      bool (*type_check)(const struct net_device *dev));
+int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
                                  netdev_features_t features, bool tx_path);
index 80ca889..664da00 100644 (file)
@@ -15,6 +15,6 @@ struct nf_acct;
 struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-extern int nfnl_acct_overquota(const struct sk_buff *skb,
-                             struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+                       struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
index 2599a98..fbc1fa6 100644 (file)
@@ -1251,10 +1251,12 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
                      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_NOLEGACY       (1 << 0) /* don't use legacy interrupts */
-#define PCI_IRQ_NOMSI          (1 << 1) /* don't use MSI interrupts */
-#define PCI_IRQ_NOMSIX         (1 << 2) /* don't use MSI-X interrupts */
-#define PCI_IRQ_NOAFFINITY     (1 << 3) /* don't auto-assign affinity */
+#define PCI_IRQ_LEGACY         (1 << 0) /* allow legacy interrupts */
+#define PCI_IRQ_MSI            (1 << 1) /* allow MSI interrupts */
+#define PCI_IRQ_MSIX           (1 << 2) /* allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY       (1 << 3) /* auto-assign affinity */
+#define PCI_IRQ_ALL_TYPES \
+       (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
index 8ed4326..2b6b43c 100644 (file)
@@ -743,7 +743,9 @@ struct perf_event_context {
        u64                             parent_gen;
        u64                             generation;
        int                             pin_count;
+#ifdef CONFIG_CGROUP_PERF
        int                             nr_cgroups;      /* cgroup evts */
+#endif
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
 };
@@ -769,7 +771,9 @@ struct perf_cpu_context {
        unsigned int                    hrtimer_active;
 
        struct pmu                      *unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup              *cgrp;
+#endif
 };
 
 struct perf_output_handle {
index 8dc155d..696a56b 100644 (file)
@@ -266,39 +266,21 @@ extern asmlinkage void dump_stack(void) __cold;
  * and other debug macros are compiled out unless either DEBUG is defined
  * or CONFIG_DYNAMIC_DEBUG is set.
  */
-
-#ifdef CONFIG_PRINTK
-
-asmlinkage __printf(1, 2) __cold void __pr_emerg(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_alert(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_crit(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_err(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_warn(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_notice(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_info(const char *fmt, ...);
-
-#define pr_emerg(fmt, ...)     __pr_emerg(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...)     __pr_alert(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...)      __pr_crit(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...)       __pr_err(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)      __pr_warn(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...)    __pr_notice(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...)      __pr_info(pr_fmt(fmt), ##__VA_ARGS__)
-
-#else
-
-#define pr_emerg(fmt, ...)     printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...)     printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...)      printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...)       printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)      printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...)    printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...)      printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
-#endif
-
-#define pr_warning pr_warn
-
+#define pr_emerg(fmt, ...) \
+       printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+       printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+       printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+       printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+       printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+       printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+       printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 /*
  * Like KERN_CONT, pr_cont() should only be used when continuing
  * a line with no newline ('\n') enclosed. Otherwise it defaults
index b1e3c57..d6c4177 100644 (file)
@@ -70,8 +70,16 @@ struct qed_dbcx_pfc_params {
        u8 max_tc;
 };
 
+enum qed_dcbx_sf_ieee_type {
+       QED_DCBX_SF_IEEE_ETHTYPE,
+       QED_DCBX_SF_IEEE_TCP_PORT,
+       QED_DCBX_SF_IEEE_UDP_PORT,
+       QED_DCBX_SF_IEEE_TCP_UDP_PORT
+};
+
 struct qed_app_entry {
        bool ethtype;
+       enum qed_dcbx_sf_ieee_type sf_ieee;
        bool enabled;
        u8 prio;
        u16 proto_id;
index de1f643..fcb4c36 100644 (file)
@@ -705,70 +705,6 @@ typedef struct sctp_auth_chunk {
        sctp_authhdr_t auth_hdr;
 } __packed sctp_auth_chunk_t;
 
-struct sctp_info {
-       __u32   sctpi_tag;
-       __u32   sctpi_state;
-       __u32   sctpi_rwnd;
-       __u16   sctpi_unackdata;
-       __u16   sctpi_penddata;
-       __u16   sctpi_instrms;
-       __u16   sctpi_outstrms;
-       __u32   sctpi_fragmentation_point;
-       __u32   sctpi_inqueue;
-       __u32   sctpi_outqueue;
-       __u32   sctpi_overall_error;
-       __u32   sctpi_max_burst;
-       __u32   sctpi_maxseg;
-       __u32   sctpi_peer_rwnd;
-       __u32   sctpi_peer_tag;
-       __u8    sctpi_peer_capable;
-       __u8    sctpi_peer_sack;
-       __u16   __reserved1;
-
-       /* assoc status info */
-       __u64   sctpi_isacks;
-       __u64   sctpi_osacks;
-       __u64   sctpi_opackets;
-       __u64   sctpi_ipackets;
-       __u64   sctpi_rtxchunks;
-       __u64   sctpi_outofseqtsns;
-       __u64   sctpi_idupchunks;
-       __u64   sctpi_gapcnt;
-       __u64   sctpi_ouodchunks;
-       __u64   sctpi_iuodchunks;
-       __u64   sctpi_oodchunks;
-       __u64   sctpi_iodchunks;
-       __u64   sctpi_octrlchunks;
-       __u64   sctpi_ictrlchunks;
-
-       /* primary transport info */
-       struct sockaddr_storage sctpi_p_address;
-       __s32   sctpi_p_state;
-       __u32   sctpi_p_cwnd;
-       __u32   sctpi_p_srtt;
-       __u32   sctpi_p_rto;
-       __u32   sctpi_p_hbinterval;
-       __u32   sctpi_p_pathmaxrxt;
-       __u32   sctpi_p_sackdelay;
-       __u32   sctpi_p_sackfreq;
-       __u32   sctpi_p_ssthresh;
-       __u32   sctpi_p_partial_bytes_acked;
-       __u32   sctpi_p_flight_size;
-       __u16   sctpi_p_error;
-       __u16   __reserved2;
-
-       /* sctp sock info */
-       __u32   sctpi_s_autoclose;
-       __u32   sctpi_s_adaptation_ind;
-       __u32   sctpi_s_pd_point;
-       __u8    sctpi_s_nodelay;
-       __u8    sctpi_s_disable_fragments;
-       __u8    sctpi_s_v4mapped;
-       __u8    sctpi_s_frag_interleave;
-       __u32   sctpi_s_type;
-       __u32   __reserved3;
-};
-
 struct sctp_infox {
        struct sctp_info *sctpinfo;
        struct sctp_association *asoc;
index 6f0b3e0..0f665cb 100644 (file)
@@ -2847,6 +2847,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
               __skb_linearize(skb) : 0;
 }
 
+static __always_inline void
+__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+                    unsigned int off)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_block_sub(skb->csum,
+                                          csum_partial(start, len, 0), off);
+       else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+                skb_checksum_start_offset(skb) < 0)
+               skb->ip_summed = CHECKSUM_NONE;
+}
+
 /**
  *     skb_postpull_rcsum - update checksum for received skb after pull
  *     @skb: buffer to update
@@ -2857,36 +2869,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
  *     update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  *     CHECKSUM_NONE so that it can be recomputed from scratch.
  */
-
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
                                      const void *start, unsigned int len)
 {
-       if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
-       else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-                skb_checksum_start_offset(skb) < 0)
-               skb->ip_summed = CHECKSUM_NONE;
+       __skb_postpull_rcsum(skb, start, len, 0);
 }
 
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+static __always_inline void
+__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+                    unsigned int off)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_block_add(skb->csum,
+                                          csum_partial(start, len, 0), off);
+}
 
+/**
+ *     skb_postpush_rcsum - update checksum for received skb after push
+ *     @skb: buffer to update
+ *     @start: start of data after push
+ *     @len: length of data pushed
+ *
+ *     After doing a push on a received packet, you need to call this to
+ *     update the CHECKSUM_COMPLETE checksum.
+ */
 static inline void skb_postpush_rcsum(struct sk_buff *skb,
                                      const void *start, unsigned int len)
 {
-       /* For performing the reverse operation to skb_postpull_rcsum(),
-        * we can instead of ...
-        *
-        *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-        *
-        * ... just use this equivalent version here to save a few
-        * instructions. Feeding csum of 0 in csum_partial() and later
-        * on adding skb->csum is equivalent to feed skb->csum in the
-        * first place.
-        */
-       if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_partial(start, len, skb->csum);
+       __skb_postpush_rcsum(skb, start, len, 0);
 }
 
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
 /**
  *     skb_push_rcsum - push skb and update receive checksum
  *     @skb: buffer to update
index 1a4ea55..4293808 100644 (file)
@@ -155,6 +155,18 @@ void kfree(const void *);
 void kzfree(const void *);
 size_t ksize(const void *);
 
+#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page);
+#else
+static inline const char *__check_heap_object(const void *ptr,
+                                             unsigned long n,
+                                             struct page *page)
+{
+       return NULL;
+}
+#endif
+
 /*
  * Some archs want to perform DMA into kmalloc caches and need a guaranteed
  * alignment larger than the alignment of a 64-bit integer.
index 76199b7..e302c44 100644 (file)
@@ -1,6 +1,16 @@
 #ifndef __SMC91X_H__
 #define __SMC91X_H__
 
+/*
+ * These bits define which access sizes a platform can support, rather
+ * than the maximal access size.  So, if your platform can do 16-bit
+ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
+ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
+ *
+ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
+ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
+ * an invalid configuration.
+ */
 #define SMC91X_USE_8BIT (1 << 0)
 #define SMC91X_USE_16BIT (1 << 1)
 #define SMC91X_USE_32BIT (1 << 2)
index b6810c9..5c02b06 100644 (file)
@@ -195,6 +195,8 @@ int         rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
                                struct rpc_xprt *,
                                void *),
                        void *data);
+void           rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+                       unsigned long timeo);
 
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
index 5e3e1b6..a16070d 100644 (file)
@@ -218,7 +218,8 @@ struct rpc_xprt {
        struct work_struct      task_cleanup;
        struct timer_list       timer;
        unsigned long           last_used,
-                               idle_timeout;
+                               idle_timeout,
+                               max_reconnect_timeout;
 
        /*
         * Send stuff
index 697e160..a4f7203 100644 (file)
@@ -42,6 +42,8 @@ extern int proc_dostring(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
+extern int proc_douintvec(struct ctl_table *, int,
+                        void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
                                void __user *, size_t *, loff_t *);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
index 352b154..cbd8990 100644 (file)
@@ -105,6 +105,30 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 
 #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
 
+#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+static inline int arch_within_stack_frames(const void * const stack,
+                                          const void * const stackend,
+                                          const void *obj, unsigned long len)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_HARDENED_USERCOPY
+extern void __check_object_size(const void *ptr, unsigned long n,
+                                       bool to_user);
+
+static inline void check_object_size(const void *ptr, unsigned long n,
+                                    bool to_user)
+{
+       __check_object_size(ptr, n, to_user);
+}
+#else
+static inline void check_object_size(const void *ptr, unsigned long n,
+                                    bool to_user)
+{ }
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
index 3495578..f30c187 100644 (file)
@@ -114,8 +114,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 #ifndef user_access_begin
 #define user_access_begin() do { } while (0)
 #define user_access_end() do { } while (0)
-#define unsafe_get_user(x, ptr) __get_user(x, ptr)
-#define unsafe_put_user(x, ptr) __put_user(x, ptr)
+#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
+#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
 #endif
 
 #endif         /* __LINUX_UACCESS_H__ */
index 41e6a24..82f3c91 100644 (file)
@@ -176,8 +176,8 @@ int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
                          struct pernet_operations *ops);
 int tcf_action_destroy(struct list_head *actions, int bind);
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-                   struct tcf_result *res);
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+                   int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct nlattr *nla,
                                  struct nlattr *est, char *n, int ovr,
                                  int bind, struct list_head *);
@@ -189,30 +189,17 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
-#define tc_no_actions(_exts) \
-       (list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
-       list_for_each_entry(a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
-       (list_is_singular(&(_exts)->actions))
+#endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
                                           u64 packets, u64 lastuse)
 {
+#ifdef CONFIG_NET_CLS_ACT
        if (!a->ops->stats_update)
                return;
 
        a->ops->stats_update(a, bytes, packets, lastuse);
+#endif
 }
 
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
-#define tc_single_action(_exts) false
-#define tcf_action_stats_update(a, bytes, packets, lastuse)
-
-#endif /* CONFIG_NET_CLS_ACT */
 #endif
index ac1bc3c..7b0f886 100644 (file)
@@ -40,12 +40,12 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
                                           unsigned long,
                                           gfp_t);
 int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
 void rxrpc_kernel_end_call(struct rxrpc_call *);
 bool rxrpc_kernel_is_data_last(struct sk_buff *);
 u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
 int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_data_delivered(struct sk_buff *);
 void rxrpc_kernel_free_skb(struct sk_buff *);
 struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
 int rxrpc_kernel_reject_call(struct socket *);
index 7a54a31..73ea256 100644 (file)
@@ -104,6 +104,7 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
 
        skb_push(skb, hdr_len);
 
+       skb_set_inner_protocol(skb, proto);
        skb_reset_transport_header(skb);
        greh = (struct gre_base_hdr *)skb->data;
        greh->flags = gre_tnl_flags_to_gre_flags(flags);
index 0dc0a51..dce2d58 100644 (file)
@@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
        to = from | htonl(INET_ECN_CE << 20);
        *(__be32 *)iph = to;
        if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_add(csum_sub(skb->csum, from), to);
+               skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+                                    (__force __wsum)to);
        return 1;
 }
 
index b4faadb..cca510a 100644 (file)
@@ -3620,7 +3620,8 @@ struct ieee80211_ops {
 
        int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
        void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
-       u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
+       u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
+                                      struct ieee80211_sta *sta);
        int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                           int *dbm);
 
index 6f8d653..c99508d 100644 (file)
@@ -59,7 +59,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
 struct tcf_exts {
 #ifdef CONFIG_NET_CLS_ACT
        __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
-       struct list_head actions;
+       int nr_actions;
+       struct tc_action **actions;
 #endif
        /* Map to export classifier specific extension TLV types to the
         * generic extensions API. Unsupported extensions must be set to 0.
@@ -72,7 +73,10 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
        exts->type = 0;
-       INIT_LIST_HEAD(&exts->actions);
+       exts->nr_actions = 0;
+       exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+                               GFP_KERNEL);
+       WARN_ON(!exts->actions); /* TODO: propagate the error to callers */
 #endif
        exts->action = action;
        exts->police = police;
@@ -89,7 +93,7 @@ static inline int
 tcf_exts_is_predicative(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       return !list_empty(&exts->actions);
+       return exts->nr_actions;
 #else
        return 0;
 #endif
@@ -108,6 +112,20 @@ tcf_exts_is_available(struct tcf_exts *exts)
        return tcf_exts_is_predicative(exts);
 }
 
+static inline void tcf_exts_to_list(const struct tcf_exts *exts,
+                                   struct list_head *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       int i;
+
+       for (i = 0; i < exts->nr_actions; i++) {
+               struct tc_action *a = exts->actions[i];
+
+               list_add(&a->list, actions);
+       }
+#endif
+}
+
 /**
  * tcf_exts_exec - execute tc filter extensions
  * @skb: socket buffer
@@ -124,12 +142,25 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
               struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (!list_empty(&exts->actions))
-               return tcf_action_exec(skb, &exts->actions, res);
+       if (exts->nr_actions)
+               return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+                                      res);
 #endif
        return 0;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
+#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
+
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_single_action(_exts) false
+
+#endif /* CONFIG_NET_CLS_ACT */
+
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                      struct nlattr **tb, struct nlattr *rate_tlv,
                      struct tcf_exts *exts, bool ovr);
index c00e7d5..7717302 100644 (file)
@@ -1523,6 +1523,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli
 {
        if (sk->sk_send_head == skb_unlinked)
                sk->sk_send_head = NULL;
+       if (tcp_sk(sk)->highest_sack == skb_unlinked)
+               tcp_sk(sk)->highest_sack = NULL;
 }
 
 static inline void tcp_init_send_head(struct sock *sk)
index 8e90dd2..e1f9673 100644 (file)
@@ -2115,22 +2115,17 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
                                       size_t len)
 {
        const void __user *p = udata->inbuf + offset;
-       bool ret = false;
+       bool ret;
        u8 *buf;
 
        if (len > USHRT_MAX)
                return false;
 
-       buf = kmalloc(len, GFP_KERNEL);
-       if (!buf)
+       buf = memdup_user(p, len);
+       if (IS_ERR(buf))
                return false;
 
-       if (copy_from_user(buf, p, len))
-               goto free;
-
        ret = !memchr_inv(buf, 0, len);
-
-free:
        kfree(buf);
        return ret;
 }
index 5144013..28c5da6 100644 (file)
@@ -330,24 +330,32 @@ TRACE_EVENT(itimer_expire,
 #ifdef CONFIG_NO_HZ_COMMON
 
 #define TICK_DEP_NAMES                                 \
-               tick_dep_name(NONE)                     \
+               tick_dep_mask_name(NONE)                \
                tick_dep_name(POSIX_TIMER)              \
                tick_dep_name(PERF_EVENTS)              \
                tick_dep_name(SCHED)                    \
                tick_dep_name_end(CLOCK_UNSTABLE)
 
 #undef tick_dep_name
+#undef tick_dep_mask_name
 #undef tick_dep_name_end
 
-#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* The MASK will convert to their bits and they need to be processed too */
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+       TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+       TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* NONE only has a mask defined for it */
+#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
 
 TICK_DEP_NAMES
 
 #undef tick_dep_name
+#undef tick_dep_mask_name
 #undef tick_dep_name_end
 
 #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
 #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
 
 #define show_tick_dep_name(val)                                \
index 9c9c6ad..5cd4d4d 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
+#include <linux/time.h>
 
 #define ZATM_GETPOOL   _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
                                                /* get pool statistics */
index da218fe..9e5fc16 100644 (file)
@@ -339,7 +339,7 @@ enum bpf_func_id {
        BPF_FUNC_skb_change_type,
 
        /**
-        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
         * @skb: pointer to skb
         * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
         * @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@ enum bpf_func_id {
         *   == 1 skb succeeded the cgroup2 descendant test
         *    < 0 error
         */
-       BPF_FUNC_skb_in_cgroup,
+       BPF_FUNC_skb_under_cgroup,
 
        /**
         * bpf_get_hash_recalc(skb)
index 163e8ad..4bd1f55 100644 (file)
@@ -16,7 +16,8 @@
 #define _UAPI__LINUX_IF_PPPOL2TP_H
 
 #include <linux/types.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* Structure used to connect() the socket to a particular tunnel UDP
  * socket over IPv4.
index e128769..d37bbb1 100644 (file)
 #include <asm/byteorder.h>
 
 #include <linux/socket.h>
+#include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_pppol2tp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
index 1046f55..777b6cd 100644 (file)
@@ -2,6 +2,9 @@
 #define _UAPI_IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <linux/if.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
 #include <asm/byteorder.h>
 
 
index 3d48014..30f031d 100644 (file)
@@ -1,11 +1,13 @@
 #ifndef _IPX_H_
 #define _IPX_H_
+#include <linux/libc-compat.h> /* for compatibility with glibc netipx/ipx.h */
 #include <linux/types.h>
 #include <linux/sockios.h>
 #include <linux/socket.h>
 #define IPX_NODE_LEN   6
 #define IPX_MTU                576
 
+#if __UAPI_DEF_SOCKADDR_IPX
 struct sockaddr_ipx {
        __kernel_sa_family_t sipx_family;
        __be16          sipx_port;
@@ -14,6 +16,7 @@ struct sockaddr_ipx {
        __u8            sipx_type;
        unsigned char   sipx_zero;      /* 16 byte fill */
 };
+#endif /* __UAPI_DEF_SOCKADDR_IPX */
 
 /*
  * So we can fit the extra info for SIOCSIFADDR into the address nicely
@@ -23,12 +26,15 @@ struct sockaddr_ipx {
 #define IPX_DLTITF     0
 #define IPX_CRTITF     1
 
+#if __UAPI_DEF_IPX_ROUTE_DEFINITION
 struct ipx_route_definition {
        __be32        ipx_network;
        __be32        ipx_router_network;
        unsigned char ipx_router_node[IPX_NODE_LEN];
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */
 
+#if __UAPI_DEF_IPX_INTERFACE_DEFINITION
 struct ipx_interface_definition {
        __be32        ipx_network;
        unsigned char ipx_device[16];
@@ -45,16 +51,20 @@ struct ipx_interface_definition {
 #define IPX_INTERNAL           2
        unsigned char ipx_node[IPX_NODE_LEN];
 };
-       
+#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */
+
+#if __UAPI_DEF_IPX_CONFIG_DATA
 struct ipx_config_data {
        unsigned char   ipxcfg_auto_select_primary;
        unsigned char   ipxcfg_auto_create_interfaces;
 };
+#endif /* __UAPI_DEF_IPX_CONFIG_DATA */
 
 /*
  * OLD Route Definition for backward compatibility.
  */
 
+#if __UAPI_DEF_IPX_ROUTE_DEF
 struct ipx_route_def {
        __be32          ipx_network;
        __be32          ipx_router_network;
@@ -67,6 +77,7 @@ struct ipx_route_def {
 #define IPX_RT_BLUEBOOK                2
 #define IPX_RT_ROUTED          1
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEF */
 
 #define SIOCAIPXITFCRT         (SIOCPROTOPRIVATE)
 #define SIOCAIPXPRISLT         (SIOCPROTOPRIVATE + 1)
index e4f048e..44b8a6b 100644 (file)
 
 #endif /* _NETINET_IN_H */
 
+/* Coordinate with glibc netipx/ipx.h header. */
+#if defined(__NETIPX_IPX_H)
+
+#define __UAPI_DEF_SOCKADDR_IPX                        0
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                0
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    0
+#define __UAPI_DEF_IPX_CONFIG_DATA             0
+#define __UAPI_DEF_IPX_ROUTE_DEF               0
+
+#else /* defined(__NETIPX_IPX_H) */
+
+#define __UAPI_DEF_SOCKADDR_IPX                        1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    1
+#define __UAPI_DEF_IPX_CONFIG_DATA             1
+#define __UAPI_DEF_IPX_ROUTE_DEF               1
+
+#endif /* defined(__NETIPX_IPX_H) */
+
 /* Definitions for xattr.h */
 #if defined(_SYS_XATTR_H)
 #define __UAPI_DEF_XATTR               0
 #define __UAPI_DEF_IN6_PKTINFO         1
 #define __UAPI_DEF_IP6_MTUINFO         1
 
+/* Definitions for ipx.h */
+#define __UAPI_DEF_SOCKADDR_IPX                        1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    1
+#define __UAPI_DEF_IPX_CONFIG_DATA             1
+#define __UAPI_DEF_IPX_ROUTE_DEF               1
+
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR               1
 
index 01751fa..c674ba2 100644 (file)
@@ -24,7 +24,7 @@ enum nft_registers {
        __NFT_REG_MAX,
 
        NFT_REG32_00    = 8,
-       MFT_REG32_01,
+       NFT_REG32_01,
        NFT_REG32_02,
        NFT_REG32_03,
        NFT_REG32_04,
index d95a301..54c3b4f 100644 (file)
@@ -583,7 +583,7 @@ enum ovs_userspace_attr {
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
 struct ovs_action_trunc {
-       uint32_t max_len; /* Max packet size in bytes. */
+       __u32 max_len; /* Max packet size in bytes. */
 };
 
 /**
@@ -632,8 +632,8 @@ enum ovs_hash_alg {
  * @hash_basis: basis used for computing hash.
  */
 struct ovs_action_hash {
-       uint32_t  hash_alg;     /* One of ovs_hash_alg. */
-       uint32_t  hash_basis;
+       __u32  hash_alg;     /* One of ovs_hash_alg. */
+       __u32  hash_basis;
 };
 
 /**
index d304f4c..a406adc 100644 (file)
@@ -944,4 +944,68 @@ struct sctp_default_prinfo {
        __u16 pr_policy;
 };
 
+struct sctp_info {
+       __u32   sctpi_tag;
+       __u32   sctpi_state;
+       __u32   sctpi_rwnd;
+       __u16   sctpi_unackdata;
+       __u16   sctpi_penddata;
+       __u16   sctpi_instrms;
+       __u16   sctpi_outstrms;
+       __u32   sctpi_fragmentation_point;
+       __u32   sctpi_inqueue;
+       __u32   sctpi_outqueue;
+       __u32   sctpi_overall_error;
+       __u32   sctpi_max_burst;
+       __u32   sctpi_maxseg;
+       __u32   sctpi_peer_rwnd;
+       __u32   sctpi_peer_tag;
+       __u8    sctpi_peer_capable;
+       __u8    sctpi_peer_sack;
+       __u16   __reserved1;
+
+       /* assoc status info */
+       __u64   sctpi_isacks;
+       __u64   sctpi_osacks;
+       __u64   sctpi_opackets;
+       __u64   sctpi_ipackets;
+       __u64   sctpi_rtxchunks;
+       __u64   sctpi_outofseqtsns;
+       __u64   sctpi_idupchunks;
+       __u64   sctpi_gapcnt;
+       __u64   sctpi_ouodchunks;
+       __u64   sctpi_iuodchunks;
+       __u64   sctpi_oodchunks;
+       __u64   sctpi_iodchunks;
+       __u64   sctpi_octrlchunks;
+       __u64   sctpi_ictrlchunks;
+
+       /* primary transport info */
+       struct sockaddr_storage sctpi_p_address;
+       __s32   sctpi_p_state;
+       __u32   sctpi_p_cwnd;
+       __u32   sctpi_p_srtt;
+       __u32   sctpi_p_rto;
+       __u32   sctpi_p_hbinterval;
+       __u32   sctpi_p_pathmaxrxt;
+       __u32   sctpi_p_sackdelay;
+       __u32   sctpi_p_sackfreq;
+       __u32   sctpi_p_ssthresh;
+       __u32   sctpi_p_partial_bytes_acked;
+       __u32   sctpi_p_flight_size;
+       __u16   sctpi_p_error;
+       __u16   __reserved2;
+
+       /* sctp sock info */
+       __u32   sctpi_s_autoclose;
+       __u32   sctpi_s_adaptation_ind;
+       __u32   sctpi_s_pd_point;
+       __u8    sctpi_s_nodelay;
+       __u8    sctpi_s_disable_fragments;
+       __u8    sctpi_s_v4mapped;
+       __u8    sctpi_s_frag_interleave;
+       __u32   sctpi_s_type;
+       __u32   __reserved3;
+};
+
 #endif /* _UAPI_SCTP_H */
index 6b011c1..1d57ed3 100644 (file)
@@ -32,7 +32,7 @@
  */
 
 #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
 
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
index cbae529..180d526 100644 (file)
@@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved {
         *
         * Of course the contents will be ABI, but that's up the AFU driver.
         */
-       size_t data_size;
-       u8 data[];
+       __u32 data_size;
+       __u8 data[];
 };
 
 struct cxl_event {
index 9a37c54..b5486e6 100644 (file)
@@ -9,8 +9,8 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
-DECLARE_PER_CPU(int, xen_vcpu_id);
-static inline int xen_vcpu_nr(int cpu)
+DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
+static inline uint32_t xen_vcpu_nr(int cpu)
 {
        return per_cpu(xen_vcpu_id, cpu);
 }
index 6988649..cac3f09 100644 (file)
@@ -1761,6 +1761,7 @@ choice
 
 config SLAB
        bool "SLAB"
+       select HAVE_HARDENED_USERCOPY_ALLOCATOR
        help
          The regular slab allocator that is established and known to work
          well in all environments. It organizes cache hot objects in
@@ -1768,6 +1769,7 @@ config SLAB
 
 config SLUB
        bool "SLUB (Unqueued Allocator)"
+       select HAVE_HARDENED_USERCOPY_ALLOCATOR
        help
           SLUB is a slab allocator that minimizes cache line usage
           instead of managing queues of cached objects (SLAB approach).
index fff3650..570eeca 100644 (file)
@@ -26,11 +26,18 @@ struct bpf_htab {
        struct bucket *buckets;
        void *elems;
        struct pcpu_freelist freelist;
+       void __percpu *extra_elems;
        atomic_t count; /* number of elements in this hashtable */
        u32 n_buckets;  /* number of hash buckets */
        u32 elem_size;  /* size of each element in bytes */
 };
 
+enum extra_elem_state {
+       HTAB_NOT_AN_EXTRA_ELEM = 0,
+       HTAB_EXTRA_ELEM_FREE,
+       HTAB_EXTRA_ELEM_USED
+};
+
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
        union {
@@ -38,7 +45,10 @@ struct htab_elem {
                struct bpf_htab *htab;
                struct pcpu_freelist_node fnode;
        };
-       struct rcu_head rcu;
+       union {
+               struct rcu_head rcu;
+               enum extra_elem_state state;
+       };
        u32 hash;
        char key[0] __aligned(8);
 };
@@ -113,6 +123,23 @@ free_elems:
        return err;
 }
 
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+       void __percpu *pptr;
+       int cpu;
+
+       pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+       if (!pptr)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+                       HTAB_EXTRA_ELEM_FREE;
+       }
+       htab->extra_elems = pptr;
+       return 0;
+}
+
 /* Called from syscall */
 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 {
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
        if (percpu)
                cost += (u64) round_up(htab->map.value_size, 8) *
                        num_possible_cpus() * htab->map.max_entries;
+       else
+              cost += (u64) htab->elem_size * num_possible_cpus();
 
        if (cost >= U32_MAX - PAGE_SIZE)
                /* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                raw_spin_lock_init(&htab->buckets[i].lock);
        }
 
+       if (!percpu) {
+               err = alloc_extra_elems(htab);
+               if (err)
+                       goto free_buckets;
+       }
+
        if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
                err = prealloc_elems_and_freelist(htab);
                if (err)
-                       goto free_buckets;
+                       goto free_extra_elems;
        }
 
        return &htab->map;
 
+free_extra_elems:
+       free_percpu(htab->extra_elems);
 free_buckets:
        kvfree(htab->buckets);
 free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
        if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
                free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
        kfree(l);
-
 }
 
 static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
+       if (l->state == HTAB_EXTRA_ELEM_USED) {
+               l->state = HTAB_EXTRA_ELEM_FREE;
+               return;
+       }
+
        if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
                pcpu_freelist_push(&htab->freelist, &l->fnode);
        } else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                         void *value, u32 key_size, u32 hash,
-                                        bool percpu, bool onallcpus)
+                                        bool percpu, bool onallcpus,
+                                        bool old_elem_exists)
 {
        u32 size = htab->map.value_size;
        bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
        struct htab_elem *l_new;
        void __percpu *pptr;
+       int err = 0;
 
        if (prealloc) {
                l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
                if (!l_new)
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
        } else {
                if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
                        atomic_dec(&htab->count);
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
+               } else {
+                       l_new = kmalloc(htab->elem_size,
+                                       GFP_ATOMIC | __GFP_NOWARN);
+                       if (!l_new)
+                               return ERR_PTR(-ENOMEM);
                }
-               l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-               if (!l_new)
-                       return ERR_PTR(-ENOMEM);
+       }
+
+       if (err) {
+               if (!old_elem_exists)
+                       return ERR_PTR(err);
+
+               /* if we're updating the existing element and the hash table
+                * is full, use per-cpu extra elems
+                */
+               l_new = this_cpu_ptr(htab->extra_elems);
+               if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+                       return ERR_PTR(-E2BIG);
+               l_new->state = HTAB_EXTRA_ELEM_USED;
+       } else {
+               l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
        }
 
        memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
        if (ret)
                goto err;
 
-       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+                               !!l_old);
        if (IS_ERR(l_new)) {
                /* all pre-allocated elements are in use or memory exhausted */
                ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                }
        } else {
                l_new = alloc_htab_elem(htab, key, value, key_size,
-                                       hash, true, onallcpus);
+                                       hash, true, onallcpus, false);
                if (IS_ERR(l_new)) {
                        ret = PTR_ERR(l_new);
                        goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
                htab_free_elems(htab);
                pcpu_freelist_destroy(&htab->freelist);
        }
+       free_percpu(htab->extra_elems);
        kvfree(htab->buckets);
        kfree(htab);
 }
index f72f23b..daea765 100644 (file)
@@ -194,6 +194,7 @@ struct verifier_env {
        struct verifier_state_list **explored_states; /* search pruning optimization */
        struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
        u32 used_map_cnt;               /* number of used maps */
+       u32 id_gen;                     /* used to generate unique reg IDs */
        bool allow_ptr_leaks;
 };
 
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                        goto error;
                break;
        case BPF_MAP_TYPE_CGROUP_ARRAY:
-               if (func_id != BPF_FUNC_skb_in_cgroup)
+               if (func_id != BPF_FUNC_skb_under_cgroup)
                        goto error;
                break;
        default:
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
                        goto error;
                break;
-       case BPF_FUNC_skb_in_cgroup:
+       case BPF_FUNC_skb_under_cgroup:
                if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
                        goto error;
                break;
@@ -1301,7 +1302,7 @@ add_imm:
                /* dst_reg stays as pkt_ptr type and since some positive
                 * integer value was added to the pointer, increment its 'id'
                 */
-               dst_reg->id++;
+               dst_reg->id = ++env->id_gen;
 
                /* something was added to pkt_ptr, set range and off to zero */
                dst_reg->off = 0;
index a19550d..3cfabdf 100644 (file)
@@ -242,18 +242,6 @@ unlock:
        return ret;
 }
 
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-       struct event_function_struct efs = {
-               .event = event,
-               .func = func,
-               .data = data,
-       };
-
-       int ret = event_function(&efs);
-       WARN_ON_ONCE(ret);
-}
-
 static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
        struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ again:
        raw_spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct task_struct *task = READ_ONCE(ctx->task);
+       struct perf_event_context *task_ctx = NULL;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       if (task) {
+               if (task == TASK_TOMBSTONE)
+                       return;
+
+               task_ctx = ctx;
+       }
+
+       perf_ctx_lock(cpuctx, task_ctx);
+
+       task = ctx->task;
+       if (task == TASK_TOMBSTONE)
+               goto unlock;
+
+       if (task) {
+               /*
+                * We must be either inactive or active and the right task,
+                * otherwise we're screwed, since we cannot IPI to somewhere
+                * else.
+                */
+               if (ctx->is_active) {
+                       if (WARN_ON_ONCE(task != current))
+                               goto unlock;
+
+                       if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+                               goto unlock;
+               }
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       func(event, cpuctx, ctx, data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -843,6 +879,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                }
        }
 }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+       struct perf_cpu_context *cpuctx;
+
+       if (!is_cgroup_event(event))
+               return;
+
+       if (add && ctx->nr_cgroups++)
+               return;
+       else if (!add && --ctx->nr_cgroups)
+               return;
+       /*
+        * Because cgroup events are always per-cpu events,
+        * this will always be called from the right CPU.
+        */
+       cpuctx = __get_cpu_context(ctx);
+       cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
 #else /* !CONFIG_CGROUP_PERF */
 
 static inline bool
@@ -920,6 +982,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                         struct perf_event_context *ctx)
 {
 }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+}
+
 #endif
 
 /*
@@ -1392,6 +1461,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+
        lockdep_assert_held(&ctx->lock);
 
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1482,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                list_add_tail(&event->group_entry, list);
        }
 
-       if (is_cgroup_event(event))
-               ctx->nr_cgroups++;
+       list_update_cgroup_event(event, ctx, true);
 
        list_add_rcu(&event->event_entry, &ctx->event_list);
        ctx->nr_events++;
@@ -1581,8 +1650,6 @@ static void perf_group_attach(struct perf_event *event)
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-       struct perf_cpu_context *cpuctx;
-
        WARN_ON_ONCE(event->ctx != ctx);
        lockdep_assert_held(&ctx->lock);
 
@@ -1594,20 +1661,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-       if (is_cgroup_event(event)) {
-               ctx->nr_cgroups--;
-               /*
-                * Because cgroup events are always per-cpu events, this will
-                * always be called from the right CPU.
-                */
-               cpuctx = __get_cpu_context(ctx);
-               /*
-                * If there are no more cgroup events then clear cgrp to avoid
-                * stale pointer in update_cgrp_time_from_cpuctx().
-                */
-               if (!ctx->nr_cgroups)
-                       cpuctx->cgrp = NULL;
-       }
+       list_update_cgroup_event(event, ctx, false);
 
        ctx->nr_events--;
        if (event->attr.inherit_stat)
@@ -1716,8 +1770,8 @@ static inline int pmu_filter_match(struct perf_event *event)
 static inline int
 event_filter_match(struct perf_event *event)
 {
-       return (event->cpu == -1 || event->cpu == smp_processor_id())
-           && perf_cgroup_match(event) && pmu_filter_match(event);
+       return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+              perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
@@ -1737,8 +1791,8 @@ event_sched_out(struct perf_event *event,
         * maintained, otherwise bogus information is return
         * via read() for time_enabled, time_running:
         */
-       if (event->state == PERF_EVENT_STATE_INACTIVE
-           && !event_filter_match(event)) {
+       if (event->state == PERF_EVENT_STATE_INACTIVE &&
+           !event_filter_match(event)) {
                delta = tstamp - event->tstamp_stopped;
                event->tstamp_running += delta;
                event->tstamp_stopped = tstamp;
@@ -2236,10 +2290,15 @@ perf_install_in_context(struct perf_event_context *ctx,
 
        lockdep_assert_held(&ctx->mutex);
 
-       event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
+       /*
+        * Ensures that if we can observe event->ctx, both the event and ctx
+        * will be 'complete'. See perf_iterate_sb_cpu().
+        */
+       smp_store_release(&event->ctx, ctx);
+
        if (!task) {
                cpu_function_call(cpu, __perf_install_in_context, event);
                return;
@@ -3490,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .group = group,
                        .ret = 0,
                };
-               smp_call_function_single(event->oncpu,
-                                        __perf_event_read, &data, 1);
-               ret = data.ret;
+               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+               /* The event must have been read from an online CPU: */
+               WARN_ON_ONCE(ret);
+               ret = ret ? : data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
@@ -5969,6 +6029,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &pel->list, sb_list) {
+               /*
+                * Skip events that are not fully formed yet; ensure that
+                * if we observe event->ctx, both event and ctx will be
+                * complete enough. See perf_install_in_context().
+                */
+               if (!smp_load_acquire(&event->ctx))
+                       continue;
+
                if (event->state < PERF_EVENT_STATE_INACTIVE)
                        continue;
                if (!event_filter_match(event))
@@ -6098,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info)
 {
        struct perf_event *event = info;
        struct pmu *pmu = event->pmu;
-       struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
        struct remote_output ro = {
                .rb     = event->rb,
        };
@@ -6552,15 +6620,6 @@ got_name:
        kfree(buf);
 }
 
-/*
- * Whether this @filter depends on a dynamic object which is not loaded
- * yet or its load addresses are not known.
- */
-static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
-{
-       return filter->filter && filter->inode;
-}
-
 /*
  * Check whether inode and address range match filter criteria.
  */
@@ -6622,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * Data tracing isn't supported yet and as such there is no need
+        * to keep track of anything that isn't related to executable code:
+        */
+       if (!(vma->vm_flags & VM_EXEC))
+               return;
+
        rcu_read_lock();
        for_each_task_context_nr(ctxn) {
                ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -7774,7 +7840,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
        list_for_each_entry(filter, &ifh->list, entry) {
                event->addr_filters_offs[count] = 0;
 
-               if (perf_addr_filter_needs_mmap(filter))
+               /*
+                * Adjust base offset if the filter is associated to a binary
+                * that needs to be mapped:
+                */
+               if (filter->inode)
                        event->addr_filters_offs[count] =
                                perf_addr_filter_apply(filter, mm);
 
@@ -7905,8 +7975,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
                                        goto fail;
                        }
 
-                       if (token == IF_SRC_FILE) {
-                               filename = match_strdup(&args[2]);
+                       if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+                               int fpos = filter->range ? 2 : 1;
+
+                               filename = match_strdup(&args[fpos]);
                                if (!filename) {
                                        ret = -ENOMEM;
                                        goto fail;
index b7a525a..8c50276 100644 (file)
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        err = -EAGAIN;
        ptep = page_check_address(page, mm, addr, &ptl, 0);
-       if (!ptep)
+       if (!ptep) {
+               mem_cgroup_cancel_charge(kpage, memcg, false);
                goto unlock;
+       }
 
        get_page(kpage);
        page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        err = 0;
  unlock:
-       mem_cgroup_cancel_charge(kpage, memcg, false);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        unlock_page(page);
        return err;
index 33664f7..46cb3a3 100644 (file)
@@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled;
  * Futex flags used to encode options to functions and preserve them across
  * restarts.
  */
-#define FLAGS_SHARED           0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED          0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED          0x00
+#endif
 #define FLAGS_CLOCKRT          0x02
 #define FLAGS_HAS_TIMEOUT      0x04
 
@@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key)
        if (!key->both.ptr)
                return;
 
+       /*
+        * On MMU less systems futexes are always "private" as there is no per
+        * process address space. We need the smp wmb nevertheless - yes,
+        * arch/blackfin has MMU less SMP ...
+        */
+       if (!IS_ENABLED(CONFIG_MMU)) {
+               smp_mb(); /* explicit smp_mb(); (B) */
+               return;
+       }
+
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
                ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key)
                return;
        }
 
+       if (!IS_ENABLED(CONFIG_MMU))
+               return;
+
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
                iput(key->shared.inode);
index f689593..32f6cfc 100644 (file)
@@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
                return NULL;
        }
 
+       get_online_cpus();
        if (max_vecs >= num_online_cpus()) {
                cpumask_copy(affinity_mask, cpu_online_mask);
                *nr_vecs = num_online_cpus();
@@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
                }
                *nr_vecs = vecs;
        }
+       put_online_cpus();
 
        return affinity_mask;
 }
index b4c1bc7..6373890 100644 (file)
@@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
        desc->name = name;
 
        if (handle != handle_bad_irq && is_chained) {
+               /*
+                * We're about to start this interrupt immediately,
+                * hence the need to set the trigger configuration.
+                * But the .set_type callback may have overridden the
+                * flow handler, ignoring that we're dealing with a
+                * chained interrupt. Reset it immediately because we
+                * do know better.
+                */
+               __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
+               desc->handle_irq = handle;
+
                irq_settings_set_noprobe(desc);
                irq_settings_set_norequest(desc);
                irq_settings_set_nothread(desc);
index 73a2b78..9530fcd 100644 (file)
@@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
        action->dev_id = dev_id;
 
        retval = irq_chip_pm_get(&desc->irq_data);
-       if (retval < 0)
+       if (retval < 0) {
+               kfree(action);
                return retval;
+       }
 
        chip_bus_lock(desc);
        retval = __setup_irq(irq, desc, action);
@@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
        action->percpu_dev_id = dev_id;
 
        retval = irq_chip_pm_get(&desc->irq_data);
-       if (retval < 0)
+       if (retval < 0) {
+               kfree(action);
                return retval;
+       }
 
        chip_bus_lock(desc);
        retval = __setup_irq(irq, desc, action);
index 5499935..19e9dfb 100644 (file)
@@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
                else
                        dev_dbg(dev, "irq [%d-%d] for MSI\n",
                                virq, virq + desc->nvec_used - 1);
+               /*
+                * This flag is set by the PCI layer as we need to activate
+                * the MSI entries before the PCI layer enables MSI in the
+                * card. Otherwise the card latches a random msi message.
+                */
+               if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+                       struct irq_data *irq_data;
+
+                       irq_data = irq_domain_get_irq_data(domain, desc->irq);
+                       irq_domain_activate_irq(irq_data);
+               }
        }
 
        return 0;
index 37649e6..8a99abf 100644 (file)
@@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
                                goto gotlock;
                        }
                }
-               WRITE_ONCE(pn->state, vcpu_halted);
+               WRITE_ONCE(pn->state, vcpu_hashed);
                qstat_inc(qstat_pv_wait_head, true);
                qstat_inc(qstat_pv_wait_again, waitcnt);
                pv_wait(&l->locked, _Q_SLOW_VAL);
index 22e0253..b9d0315 100644 (file)
@@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
                 */
                if ((counter == qstat_pv_latency_kick) ||
                    (counter == qstat_pv_latency_wake)) {
-                       stat = 0;
                        if (kicks)
                                stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
                }
index a881c6a..33c79b6 100644 (file)
@@ -300,12 +300,12 @@ static int create_image(int platform_mode)
        save_processor_state();
        trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
        error = swsusp_arch_suspend();
+       /* Restore control flow magically appears here */
+       restore_processor_state();
        trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
        if (error)
                printk(KERN_ERR "PM: Error %d creating hibernation image\n",
                        error);
-       /* Restore control flow magically appears here */
-       restore_processor_state();
        if (!in_suspend)
                events_check_enabled = false;
 
index 9a0178c..b022284 100644 (file)
@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
-       bm->cur.node = list_entry(bm->cur.node->list.next,
-                                 struct rtree_node, list);
-       if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+       if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+               bm->cur.node = list_entry(bm->cur.node->list.next,
+                                         struct rtree_node, list);
                bm->cur.node_pfn += BM_BITS_PER_BLOCK;
                bm->cur.node_bit  = 0;
                touch_softlockup_watchdog();
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
        }
 
        /* No more nodes, goto next zone */
-       bm->cur.zone = list_entry(bm->cur.zone->list.next,
+       if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+               bm->cur.zone = list_entry(bm->cur.zone->list.next,
                                  struct mem_zone_bm_rtree, list);
-       if (&bm->cur.zone->list != &bm->zones) {
                bm->cur.node = list_entry(bm->cur.zone->leaves.next,
                                          struct rtree_node, list);
                bm->cur.node_pfn = 0;
index 276762f..d5760c4 100644 (file)
@@ -9,10 +9,10 @@
 
 char *_braille_console_setup(char **str, char **brl_options)
 {
-       if (!memcmp(*str, "brl,", 4)) {
+       if (!strncmp(*str, "brl,", 4)) {
                *brl_options = "";
                *str += 4;
-       } else if (!memcmp(str, "brl=", 4)) {
+       } else if (!strncmp(*str, "brl=", 4)) {
                *brl_options = *str + 4;
                *str = strchr(*brl_options, ',');
                if (!*str)
index 5d4505f..7fd2838 100644 (file)
  */
 #include <linux/percpu.h>
 
-typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt,
-                                           va_list args);
+typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
 
-__printf(2, 0)
-int vprintk_default(int level, const char *fmt, va_list args);
+int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
 
 #ifdef CONFIG_PRINTK_NMI
 
@@ -33,10 +31,9 @@ extern raw_spinlock_t logbuf_lock;
  * via per-CPU variable.
  */
 DECLARE_PER_CPU(printk_func_t, printk_func);
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
-       return this_cpu_read(printk_func)(level, fmt, args);
+       return this_cpu_read(printk_func)(fmt, args);
 }
 
 extern atomic_t nmi_message_lost;
@@ -47,10 +44,9 @@ static inline int get_nmi_message_lost(void)
 
 #else /* CONFIG_PRINTK_NMI */
 
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
-       return vprintk_default(level, fmt, args);
+       return vprintk_default(fmt, args);
 }
 
 static inline int get_nmi_message_lost(void)
index bc3eeb1..b69eb8a 100644 (file)
@@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
  * one writer running. But the buffer might get flushed from another
  * CPU, so we need to be careful.
  */
-static int vprintk_nmi(int level, const char *fmt, va_list args)
+static int vprintk_nmi(const char *fmt, va_list args)
 {
        struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
        int add = 0;
@@ -79,16 +79,7 @@ again:
        if (!len)
                smp_rmb();
 
-       if (level != LOGLEVEL_DEFAULT) {
-               add = snprintf(s->buffer + len, sizeof(s->buffer) - len,
-                               KERN_SOH "%c", '0' + level);
-               add += vsnprintf(s->buffer + len + add,
-                                sizeof(s->buffer) - len - add,
-                                fmt, args);
-       } else {
-               add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len,
-                               fmt, args);
-       }
+       add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
 
        /*
         * Do it once again if the buffer has been flushed in the meantime.
index a5ef95c..eea6dbc 100644 (file)
@@ -1930,28 +1930,7 @@ asmlinkage int printk_emit(int facility, int level,
 }
 EXPORT_SYMBOL(printk_emit);
 
-#ifdef CONFIG_PRINTK
-#define define_pr_level(func, loglevel)                                \
-asmlinkage __visible void func(const char *fmt, ...)           \
-{                                                              \
-       va_list args;                                           \
-                                                               \
-       va_start(args, fmt);                                    \
-       vprintk_default(loglevel, fmt, args);                   \
-       va_end(args);                                           \
-}                                                              \
-EXPORT_SYMBOL(func)
-
-define_pr_level(__pr_emerg, LOGLEVEL_EMERG);
-define_pr_level(__pr_alert, LOGLEVEL_ALERT);
-define_pr_level(__pr_crit, LOGLEVEL_CRIT);
-define_pr_level(__pr_err, LOGLEVEL_ERR);
-define_pr_level(__pr_warn, LOGLEVEL_WARNING);
-define_pr_level(__pr_notice, LOGLEVEL_NOTICE);
-define_pr_level(__pr_info, LOGLEVEL_INFO);
-#endif
-
-int vprintk_default(int level, const char *fmt, va_list args)
+int vprintk_default(const char *fmt, va_list args)
 {
        int r;
 
@@ -1961,7 +1940,7 @@ int vprintk_default(int level, const char *fmt, va_list args)
                return r;
        }
 #endif
-       r = vprintk_emit(0, level, NULL, 0, fmt, args);
+       r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
 
        return r;
 }
@@ -1994,7 +1973,7 @@ asmlinkage __visible int printk(const char *fmt, ...)
        int r;
 
        va_start(args, fmt);
-       r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args);
+       r = vprintk_func(fmt, args);
        va_end(args);
 
        return r;
index 5c883fe..2a906f2 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 #include <linux/frame.h>
+#include <linux/prefetch.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -2971,6 +2972,23 @@ DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 EXPORT_PER_CPU_SYMBOL(kstat);
 EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
 
+/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+       struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+       prefetch(curr);
+       prefetch(&curr->exec_start);
+}
+
 /*
  * Return accounted runtime for the task.
  * In case the task is currently running, return the runtime plus current's
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
+               prefetch_curr_exec_start(p);
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
index 5be5882..d418449 100644 (file)
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
 
        if (old_idx == IDX_INVALID) {
                cp->size++;
-               cp->elements[cp->size - 1].dl = 0;
+               cp->elements[cp->size - 1].dl = dl;
                cp->elements[cp->size - 1].cpu = cpu;
                cp->elements[cpu].idx = cp->size - 1;
                cpudl_change_key(cp, cp->size - 1, dl);
index 1934f65..a846cf8 100644 (file)
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
         * idle, or potentially user or system time. Due to rounding,
         * other time can exceed ticks occasionally.
         */
-       other = account_other_time(cputime);
+       other = account_other_time(ULONG_MAX);
        if (other >= cputime)
                return;
        cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
        }
 
        cputime = cputime_one_jiffy;
-       steal = steal_account_process_time(cputime);
+       steal = steal_account_process_time(ULONG_MAX);
 
        if (steal >= cputime)
                return;
@@ -508,13 +513,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
  */
 void account_idle_ticks(unsigned long ticks)
 {
+       cputime_t cputime, steal;
 
        if (sched_clock_irqtime) {
                irqtime_account_idle_ticks(ticks);
                return;
        }
 
-       account_idle_time(jiffies_to_cputime(ticks));
+       cputime = jiffies_to_cputime(ticks);
+       steal = steal_account_process_time(ULONG_MAX);
+
+       if (steal >= cputime)
+               return;
+
+       cputime -= steal;
+       account_idle_time(cputime);
 }
 
 /*
@@ -606,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
        stime = curr->stime;
        utime = curr->utime;
 
-       if (utime == 0) {
-               stime = rtime;
+       /*
+        * If either stime or both stime and utime are 0, assume all runtime is
+        * userspace. Once a task gets some ticks, the monotonicy code at
+        * 'update' will ensure things converge to the observed ratio.
+        */
+       if (stime == 0) {
+               utime = rtime;
                goto update;
        }
 
-       if (stime == 0) {
-               utime = rtime;
+       if (utime == 0) {
+               stime = rtime;
                goto update;
        }
 
        stime = scale_stime((__force u64)stime, (__force u64)rtime,
                            (__force u64)(stime + utime));
 
+update:
        /*
         * Make sure stime doesn't go backwards; this preserves monotonicity
         * for utime because rtime is monotonic.
@@ -641,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
                stime = rtime - utime;
        }
 
-update:
        prev->stime = stime;
        prev->utime = utime;
 out:
@@ -686,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
        unsigned long now = READ_ONCE(jiffies);
        cputime_t delta, other;
 
+       /*
+        * Unlike tick based timing, vtime based timing never has lost
+        * ticks, and no need for steal time accounting to make up for
+        * lost ticks. Vtime accounts a rounded version of actual
+        * elapsed time. Limit account_other_time to prevent rounding
+        * errors from causing elapsed vtime to go negative.
+        */
        delta = jiffies_to_cputime(now - tsk->vtime_snap);
        other = account_other_time(delta);
        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
index fcb7f02..1ce8867 100644 (file)
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
         *
         * XXX figure out if select_task_rq_dl() deals with offline cpus.
         */
-       if (unlikely(!rq->online))
+       if (unlikely(!rq->online)) {
+               lockdep_unpin_lock(&rq->lock, rf.cookie);
                rq = dl_task_offline_migration(rq, p);
+               rf.cookie = lockdep_pin_lock(&rq->lock);
+       }
 
        /*
         * Queueing this task back might have overloaded rq, check if we need
index 4088eed..039de34 100644 (file)
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
        pcfs_rq = tg->parent->cfs_rq[cpu];
 
        cfs_rq->throttle_count = pcfs_rq->throttle_count;
-       pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+       cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
 }
 
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
index b43d0b2..a13bbda 100644 (file)
@@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
        return 0;
 }
 
+static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
+                                int *valp,
+                                int write, void *data)
+{
+       if (write) {
+               if (*negp)
+                       return -EINVAL;
+               *valp = *lvalp;
+       } else {
+               unsigned int val = *valp;
+               *lvalp = (unsigned long)val;
+       }
+       return 0;
+}
+
 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 int proc_dointvec(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,buffer,lenp,ppos,
-                           NULL,NULL);
+       return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
+}
+
+/**
+ * proc_douintvec - read a vector of unsigned integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec(struct ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
+                               do_proc_douintvec_conv, NULL);
 }
 
 /*
@@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write,
        return -ENOSYS;
 }
 
+int proc_douintvec(struct ctl_table *table, int write,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return -ENOSYS;
+}
+
 int proc_dointvec_minmax(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
  * exception granted :-)
  */
 EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
index 3b65746..e07fb09 100644 (file)
@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
        do {
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
-               now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+               now = ktime_to_ns(tkr->base);
+
+               now += clocksource_delta(tkr->read(tkr->clock),
+                                        tkr->cycle_last, tkr->mask);
        } while (read_seqcount_retry(&tkf->seq, seq));
 
        return now;
index f6bd652..107310a 100644 (file)
@@ -23,7 +23,9 @@
 
 #include "timekeeping_internal.h"
 
-static unsigned int sleep_time_bin[32] = {0};
+#define NUM_BINS 32
+
+static unsigned int sleep_time_bin[NUM_BINS] = {0};
 
 static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
 {
@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init);
 
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
-       sleep_time_bin[fls(t->tv_sec)]++;
+       /* Cap bin index so we don't overflow the array */
+       int bin = min(fls(t->tv_sec), NUM_BINS-1);
+
+       sleep_time_bin[bin]++;
 }
 
index 555670a..32bf6f7 100644 (file)
@@ -1496,6 +1496,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
        u64 expires = KTIME_MAX;
        unsigned long nextevt;
+       bool is_max_delta;
 
        /*
         * Pretend that there is no timer pending if the cpu is offline.
@@ -1506,6 +1507,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 
        spin_lock(&base->lock);
        nextevt = __next_timer_interrupt(base);
+       is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
        base->next_expiry = nextevt;
        /*
         * We have a fresh next event. Check whether we can forward the base:
@@ -1519,7 +1521,8 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
                expires = basem;
                base->is_idle = false;
        } else {
-               expires = basem + (nextevt - basej) * TICK_NSEC;
+               if (!is_max_delta)
+                       expires = basem + (nextevt - basej) * TICK_NSEC;
                /*
                 * If we expect to sleep more than a tick, mark the base idle:
                 */
index 7598e6c..dbafc5d 100644 (file)
@@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
        what |= MASK_TC_BIT(op_flags, META);
        what |= MASK_TC_BIT(op_flags, PREFLUSH);
        what |= MASK_TC_BIT(op_flags, FUA);
-       if (op == REQ_OP_DISCARD)
+       if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
                what |= BLK_TC_ACT(BLK_TC_DISCARD);
        if (op == REQ_OP_FLUSH)
                what |= BLK_TC_ACT(BLK_TC_FLUSH);
index 5d845ff..56054e5 100644 (file)
@@ -30,7 +30,7 @@
 
 #define HASH_DEFAULT_SIZE      64UL
 #define HASH_MIN_SIZE          4U
-#define BUCKET_LOCKS_PER_CPU   128UL
+#define BUCKET_LOCKS_PER_CPU   32UL
 
 static u32 head_hashfn(struct rhashtable *ht,
                       const struct bucket_table *tbl,
@@ -70,21 +70,25 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
        unsigned int nr_pcpus = num_possible_cpus();
 #endif
 
-       nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+       nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
        size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
 
        /* Never allocate more than 0.5 locks per bucket */
        size = min_t(unsigned int, size, tbl->size >> 1);
 
        if (sizeof(spinlock_t) != 0) {
+               tbl->locks = NULL;
 #ifdef CONFIG_NUMA
                if (size * sizeof(spinlock_t) > PAGE_SIZE &&
                    gfp == GFP_KERNEL)
                        tbl->locks = vmalloc(size * sizeof(spinlock_t));
-               else
 #endif
-               tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-                                          gfp);
+               if (gfp != GFP_KERNEL)
+                       gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+               if (!tbl->locks)
+                       tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+                                                  gfp);
                if (!tbl->locks)
                        return -ENOMEM;
                for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@ static int rhashtable_expand(struct rhashtable *ht)
 static int rhashtable_shrink(struct rhashtable *ht)
 {
        struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-       unsigned int size;
+       unsigned int nelems = atomic_read(&ht->nelems);
+       unsigned int size = 0;
        int err;
 
        ASSERT_RHT_MUTEX(ht);
 
-       size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+       if (nelems)
+               size = roundup_pow_of_two(nelems * 3 / 2);
        if (size < ht->p.min_size)
                size = ht->p.min_size;
 
index 33f655e..9c5fe81 100644 (file)
@@ -40,8 +40,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
                unsigned long c, data;
 
                /* Fall back to byte-at-a-time if we get a page fault */
-               if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
-                       break;
+               unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
                *(unsigned long *)(dst+res) = c;
                if (has_zero(c, &data, &constants)) {
                        data = prep_zero_mask(c, data, &constants);
@@ -56,8 +56,7 @@ byte_at_a_time:
        while (max) {
                char c;
 
-               if (unlikely(unsafe_get_user(c,src+res)))
-                       return -EFAULT;
+               unsafe_get_user(c,src+res, efault);
                dst[res] = c;
                if (!c)
                        return res;
@@ -76,6 +75,7 @@ byte_at_a_time:
         * Nope: we hit the address space limit, and we still had more
         * characters the caller would have wanted. That's an EFAULT.
         */
+efault:
        return -EFAULT;
 }
 
index 2625943..8e105ed 100644 (file)
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
        src -= align;
        max += align;
 
-       if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
-               return 0;
+       unsafe_get_user(c, (unsigned long __user *)src, efault);
        c |= aligned_byte_mask(align);
 
        for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
                if (unlikely(max <= sizeof(unsigned long)))
                        break;
                max -= sizeof(unsigned long);
-               if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
-                       return 0;
+               unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
        }
        res -= align;
 
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
         * Nope: we hit the address space limit, and we still had more
         * characters the caller would have wanted. That's 0.
         */
+efault:
        return 0;
 }
 
index 297fdb5..64e899b 100644 (file)
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
 
 static int max_size = 0;
 module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
 
 static bool shrinking = false;
 module_param(shrinking, bool, 0);
index 78a23c5..be0ee11 100644 (file)
@@ -262,7 +262,14 @@ config COMPACTION
        select MIGRATION
        depends on MMU
        help
-         Allows the compaction of memory for the allocation of huge pages.
+          Compaction is the only memory management component to form
+          high order (larger physically contiguous) memory blocks
+          reliably. The page allocator relies on compaction heavily and
+          the lack of the feature can lead to unexpected OOM killer
+          invocations for high order memory requests. You shouldn't
+          disable this option unless there really is a strong reason for
+          it and then we would be really interested to hear about that at
+          linux-mm@kvack.org.
 
 #
 # support for page migration
index fc05966..2ca1faf 100644 (file)
@@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
 KCOV_INSTRUMENT_mmzone.o := n
 KCOV_INSTRUMENT_vmstat.o := n
 
+# Since __builtin_frame_address does work as used, disable the warning.
+CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := gup.o highmem.o memory.o mincore.o \
                           mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
@@ -99,3 +102,4 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
index 2373f0a..2db2112 100644 (file)
@@ -1512,7 +1512,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *page;
        pgtable_t pgtable;
        pmd_t _pmd;
-       bool young, write, dirty;
+       bool young, write, dirty, soft_dirty;
        unsigned long addr;
        int i;
 
@@ -1546,6 +1546,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        write = pmd_write(*pmd);
        young = pmd_young(*pmd);
        dirty = pmd_dirty(*pmd);
+       soft_dirty = pmd_soft_dirty(*pmd);
 
        pmdp_huge_split_prepare(vma, haddr, pmd);
        pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1562,6 +1563,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        swp_entry_t swp_entry;
                        swp_entry = make_migration_entry(page + i, write);
                        entry = swp_entry_to_pte(swp_entry);
+                       if (soft_dirty)
+                               entry = pte_swp_mksoft_dirty(entry);
                } else {
                        entry = mk_pte(page + i, vma->vm_page_prot);
                        entry = maybe_mkwrite(entry, vma);
@@ -1569,6 +1572,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                                entry = pte_wrprotect(entry);
                        if (!young)
                                entry = pte_mkold(entry);
+                       if (soft_dirty)
+                               entry = pte_mksoft_dirty(entry);
                }
                if (dirty)
                        SetPageDirty(page + i);
index b9aa1b0..87e11d8 100644 (file)
@@ -1448,6 +1448,7 @@ static void dissolve_free_huge_page(struct page *page)
                list_del(&page->lru);
                h->free_huge_pages--;
                h->free_huge_pages_node[nid]--;
+               h->max_huge_pages--;
                update_and_free_page(h, page);
        }
        spin_unlock(&hugetlb_lock);
index b6728a3..baabaad 100644 (file)
@@ -217,11 +217,8 @@ void quarantine_reduce(void)
        new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
                QUARANTINE_FRACTION;
        percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
-       if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
-               "Too little memory, disabling global KASAN quarantine.\n"))
-               new_quarantine_size = 0;
-       else
-               new_quarantine_size -= percpu_quarantines;
+       new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+               0 : new_quarantine_size - percpu_quarantines;
        WRITE_ONCE(quarantine_size, new_quarantine_size);
 
        last = global_quarantine.head;
index 66beca1..9a6a51a 100644 (file)
@@ -2337,8 +2337,11 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
                return 0;
 
        memcg = get_mem_cgroup_from_mm(current->mm);
-       if (!mem_cgroup_is_root(memcg))
+       if (!mem_cgroup_is_root(memcg)) {
                ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
+               if (!ret)
+                       __SetPageKmemcg(page);
+       }
        css_put(&memcg->css);
        return ret;
 }
@@ -2365,6 +2368,11 @@ void memcg_kmem_uncharge(struct page *page, int order)
                page_counter_uncharge(&memcg->memsw, nr_pages);
 
        page->mem_cgroup = NULL;
+
+       /* slab pages do not have PageKmemcg flag set */
+       if (PageKmemcg(page))
+               __ClearPageKmemcg(page);
+
        css_put_many(&memcg->css, nr_pages);
 }
 #endif /* !CONFIG_SLOB */
@@ -4069,14 +4077,14 @@ static struct cftype mem_cgroup_legacy_files[] = {
 
 static DEFINE_IDR(mem_cgroup_idr);
 
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
-       atomic_inc(&memcg->id.ref);
+       atomic_add(n, &memcg->id.ref);
 }
 
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
-       if (atomic_dec_and_test(&memcg->id.ref)) {
+       if (atomic_sub_and_test(n, &memcg->id.ref)) {
                idr_remove(&mem_cgroup_idr, memcg->id.id);
                memcg->id.id = 0;
 
@@ -4085,6 +4093,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
        }
 }
 
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_put_many(memcg, 1);
+}
+
 /**
  * mem_cgroup_from_id - look up a memcg from a memcg id
  * @id: the memcg id to look up
@@ -4719,6 +4737,8 @@ static void __mem_cgroup_clear_mc(void)
                if (!mem_cgroup_is_root(mc.from))
                        page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
 
+               mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
                /*
                 * we charged both to->memory and to->memsw, so we
                 * should uncharge to->memory.
@@ -4726,9 +4746,9 @@ static void __mem_cgroup_clear_mc(void)
                if (!mem_cgroup_is_root(mc.to))
                        page_counter_uncharge(&mc.to->memory, mc.moved_swap);
 
-               css_put_many(&mc.from->css, mc.moved_swap);
+               mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+               css_put_many(&mc.to->css, mc.moved_swap);
 
-               /* we've already done css_get(mc.to) */
                mc.moved_swap = 0;
        }
        memcg_oom_recover(from);
@@ -5537,8 +5557,10 @@ static void uncharge_list(struct list_head *page_list)
                        else
                                nr_file += nr_pages;
                        pgpgout++;
-               } else
+               } else {
                        nr_kmem += 1 << compound_order(page);
+                       __ClearPageKmemcg(page);
+               }
 
                page->mem_cgroup = NULL;
        } while (next != page_list);
@@ -5781,6 +5803,24 @@ static int __init mem_cgroup_init(void)
 subsys_initcall(mem_cgroup_init);
 
 #ifdef CONFIG_MEMCG_SWAP
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+{
+       while (!atomic_inc_not_zero(&memcg->id.ref)) {
+               /*
+                * The root cgroup cannot be destroyed, so it's refcount must
+                * always be >= 1.
+                */
+               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+                       VM_BUG_ON(1);
+                       break;
+               }
+               memcg = parent_mem_cgroup(memcg);
+               if (!memcg)
+                       memcg = root_mem_cgroup;
+       }
+       return memcg;
+}
+
 /**
  * mem_cgroup_swapout - transfer a memsw charge to swap
  * @page: page whose memsw charge to transfer
@@ -5790,7 +5830,7 @@ subsys_initcall(mem_cgroup_init);
  */
 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 {
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *swap_memcg;
        unsigned short oldid;
 
        VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5805,16 +5845,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
        if (!memcg)
                return;
 
-       mem_cgroup_id_get(memcg);
-       oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+       /*
+        * In case the memcg owning these pages has been offlined and doesn't
+        * have an ID allocated to it anymore, charge the closest online
+        * ancestor for the swap instead and transfer the memory+swap charge.
+        */
+       swap_memcg = mem_cgroup_id_get_online(memcg);
+       oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
        VM_BUG_ON_PAGE(oldid, page);
-       mem_cgroup_swap_statistics(memcg, true);
+       mem_cgroup_swap_statistics(swap_memcg, true);
 
        page->mem_cgroup = NULL;
 
        if (!mem_cgroup_is_root(memcg))
                page_counter_uncharge(&memcg->memory, 1);
 
+       if (memcg != swap_memcg) {
+               if (!mem_cgroup_is_root(swap_memcg))
+                       page_counter_charge(&swap_memcg->memsw, 1);
+               page_counter_uncharge(&memcg->memsw, 1);
+       }
+
        /*
         * Interrupts should be disabled here because the caller holds the
         * mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5853,11 +5904,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
        if (!memcg)
                return 0;
 
+       memcg = mem_cgroup_id_get_online(memcg);
+
        if (!mem_cgroup_is_root(memcg) &&
-           !page_counter_try_charge(&memcg->swap, 1, &counter))
+           !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+               mem_cgroup_id_put(memcg);
                return -ENOMEM;
+       }
 
-       mem_cgroup_id_get(memcg);
        oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
        VM_BUG_ON_PAGE(oldid, page);
        mem_cgroup_swap_statistics(memcg, true);
index 3894b65..41266dc 100644 (file)
@@ -1219,6 +1219,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
        /* init node's zones as empty zones, we don't have any present pages.*/
        free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+       pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
        /*
         * The node we allocated has no zone fallback lists. For avoiding
@@ -1249,6 +1250,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 {
        arch_refresh_nodedata(nid, NULL);
+       free_percpu(pgdat->per_cpu_nodestats);
        arch_free_nodedata(pgdat);
        return;
 }
index 7d0a275..d53a9aa 100644 (file)
@@ -764,7 +764,7 @@ bool task_will_free_mem(struct task_struct *task)
 {
        struct mm_struct *mm = task->mm;
        struct task_struct *p;
-       bool ret;
+       bool ret = true;
 
        /*
         * Skip tasks without mm because it might have passed its exit_mm and
index fb975ce..3fbe73a 100644 (file)
@@ -1008,10 +1008,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
        }
        if (PageMappingFlags(page))
                page->mapping = NULL;
-       if (memcg_kmem_enabled() && PageKmemcg(page)) {
+       if (memcg_kmem_enabled() && PageKmemcg(page))
                memcg_kmem_uncharge(page, order);
-               __ClearPageKmemcg(page);
-       }
        if (check_free)
                bad += free_pages_check(page);
        if (bad)
@@ -3756,12 +3754,10 @@ no_zone:
        }
 
 out:
-       if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
-               if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
-                       __free_pages(page, order);
-                       page = NULL;
-               } else
-                       __SetPageKmemcg(page);
+       if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
+           unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
+               __free_pages(page, order);
+               page = NULL;
        }
 
        if (kmemcheck_enabled && page)
@@ -4064,7 +4060,7 @@ long si_mem_available(void)
        int lru;
 
        for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
        for_each_zone(zone)
                wmark_low += zone->watermark[WMARK_LOW];
@@ -4761,6 +4757,8 @@ int local_memory_node(int node)
 }
 #endif
 
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
 #else  /* CONFIG_NUMA */
 
 static void set_zonelist_order(void)
@@ -5882,9 +5880,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
 #ifdef CONFIG_NUMA
                zone->node = nid;
-               pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
-                                               / 100;
-               pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
 #endif
                zone->name = zone_names[j];
                zone->zone_pgdat = pgdat;
@@ -6805,6 +6800,12 @@ int __meminit init_per_zone_wmark_min(void)
        setup_per_zone_wmarks();
        refresh_zone_stat_thresholds();
        setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+       setup_min_unmapped_ratio();
+       setup_min_slab_ratio();
+#endif
+
        return 0;
 }
 core_initcall(init_per_zone_wmark_min)
@@ -6846,43 +6847,58 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
 }
 
 #ifdef CONFIG_NUMA
+static void setup_min_unmapped_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
+       for_each_online_pgdat(pgdat)
+               pgdat->min_unmapped_pages = 0;
+
+       for_each_zone(zone)
+               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
+                               sysctl_min_unmapped_ratio) / 100;
+}
+
+
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       struct pglist_data *pgdat;
-       struct zone *zone;
        int rc;
 
        rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
+       setup_min_unmapped_ratio();
+
+       return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
        for_each_online_pgdat(pgdat)
                pgdat->min_slab_pages = 0;
 
        for_each_zone(zone)
-               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
-                               sysctl_min_unmapped_ratio) / 100;
-       return 0;
+               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
+                               sysctl_min_slab_ratio) / 100;
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       struct pglist_data *pgdat;
-       struct zone *zone;
        int rc;
 
        rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
-       for_each_online_pgdat(pgdat)
-               pgdat->min_slab_pages = 0;
+       setup_min_slab_ratio();
 
-       for_each_zone(zone)
-               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
-                               sysctl_min_slab_ratio) / 100;
        return 0;
 }
 #endif
index 65ec288..c8a955b 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
 #include <linux/blkdev.h>
@@ -544,6 +545,14 @@ do_readahead(struct address_space *mapping, struct file *filp,
        if (!mapping || !mapping->a_ops)
                return -EINVAL;
 
+       /*
+        * Readahead doesn't make sense for DAX inodes, but we don't want it
+        * to report a failure either.  Instead, we just return success and
+        * don't do any work.
+        */
+       if (dax_mapping(mapping))
+               return 0;
+
        return force_page_cache_readahead(mapping, filp, index, nr);
 }
 
index 709bc83..1ef3640 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,8 +1284,9 @@ void page_add_file_rmap(struct page *page, bool compound)
                VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
                __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
        } else {
-               if (PageTransCompound(page)) {
-                       VM_BUG_ON_PAGE(!PageLocked(page), page);
+               if (PageTransCompound(page) && page_mapping(page)) {
+                       VM_WARN_ON_ONCE(!PageLocked(page));
+
                        SetPageDoubleMap(compound_head(page));
                        if (PageMlocked(page))
                                clear_page_mlock(compound_head(page));
@@ -1303,7 +1304,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 {
        int i, nr = 1;
 
-       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+       VM_BUG_ON_PAGE(compound && !PageHead(page), page);
        lock_page_memcg(page);
 
        /* Hugepages are not counted in NR_FILE_MAPPED for now. */
index 7f7748a..fd8b2b5 100644 (file)
@@ -3975,7 +3975,9 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
 
 struct kobj_attribute shmem_enabled_attr =
        __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
 
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
 bool shmem_huge_enabled(struct vm_area_struct *vma)
 {
        struct inode *inode = file_inode(vma->vm_file);
@@ -4006,7 +4008,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
                        return false;
        }
 }
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
 
 #else /* !CONFIG_SHMEM */
 
index 261147b..b672710 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4441,6 +4441,36 @@ static int __init slab_proc_init(void)
 module_init(slab_proc_init);
 #endif
 
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page)
+{
+       struct kmem_cache *cachep;
+       unsigned int objnr;
+       unsigned long offset;
+
+       /* Find and validate object. */
+       cachep = page->slab_cache;
+       objnr = obj_to_index(cachep, page, (void *)ptr);
+       BUG_ON(objnr >= cachep->num);
+
+       /* Find offset within object. */
+       offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+
+       /* Allow address range falling entirely within object size. */
+       if (offset <= cachep->object_size && n <= cachep->object_size - offset)
+               return NULL;
+
+       return cachep->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 /**
  * ksize - get the actual amount of memory allocated for a given object
  * @objp: Pointer to the object
index 850737b..9adae58 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3629,6 +3629,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
+       LIST_HEAD(discard);
        struct page *page, *h;
 
        BUG_ON(irqs_disabled());
@@ -3636,13 +3637,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
        list_for_each_entry_safe(page, h, &n->partial, lru) {
                if (!page->inuse) {
                        remove_partial(n, page);
-                       discard_slab(s, page);
+                       list_add(&page->lru, &discard);
                } else {
                        list_slab_objects(s, page,
                        "Objects remaining in %s on __kmem_cache_shutdown()");
                }
        }
        spin_unlock_irq(&n->list_lock);
+
+       list_for_each_entry_safe(page, h, &discard, lru)
+               discard_slab(s, page);
 }
 
 /*
@@ -3764,6 +3768,46 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
 
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+                               struct page *page)
+{
+       struct kmem_cache *s;
+       unsigned long offset;
+       size_t object_size;
+
+       /* Find object and usable object size. */
+       s = page->slab_cache;
+       object_size = slab_ksize(s);
+
+       /* Reject impossible pointers. */
+       if (ptr < page_address(page))
+               return s->name;
+
+       /* Find offset within object. */
+       offset = (ptr - page_address(page)) % s->size;
+
+       /* Adjust for redzone and reject if within the redzone. */
+       if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+               if (offset < s->red_left_pad)
+                       return s->name;
+               offset -= s->red_left_pad;
+       }
+
+       /* Allow address range falling entirely within object size. */
+       if (offset <= object_size && n <= object_size - offset)
+               return NULL;
+
+       return s->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
 static size_t __ksize(const void *object)
 {
        struct page *page;
diff --git a/mm/usercopy.c b/mm/usercopy.c
new file mode 100644 (file)
index 0000000..a3cc305
--- /dev/null
@@ -0,0 +1,268 @@
+/*
+ * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
+ * which are designed to protect kernel memory from needless exposure
+ * and overwrite under many unintended conditions. This code is based
+ * on PAX_USERCOPY, which is:
+ *
+ * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
+ * Security Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+
+enum {
+       BAD_STACK = -1,
+       NOT_STACK = 0,
+       GOOD_FRAME,
+       GOOD_STACK,
+};
+
+/*
+ * Checks if a given pointer and length is contained by the current
+ * stack frame (if possible).
+ *
+ * Returns:
+ *     NOT_STACK: not at all on the stack
+ *     GOOD_FRAME: fully within a valid stack frame
+ *     GOOD_STACK: fully on the stack (when can't do frame-checking)
+ *     BAD_STACK: error condition (invalid stack position or bad stack frame)
+ */
+static noinline int check_stack_object(const void *obj, unsigned long len)
+{
+       const void * const stack = task_stack_page(current);
+       const void * const stackend = stack + THREAD_SIZE;
+       int ret;
+
+       /* Object is not on the stack at all. */
+       if (obj + len <= stack || stackend <= obj)
+               return NOT_STACK;
+
+       /*
+        * Reject: object partially overlaps the stack (passing the
+        * the check above means at least one end is within the stack,
+        * so if this check fails, the other end is outside the stack).
+        */
+       if (obj < stack || stackend < obj + len)
+               return BAD_STACK;
+
+       /* Check if object is safely within a valid frame. */
+       ret = arch_within_stack_frames(stack, stackend, obj, len);
+       if (ret)
+               return ret;
+
+       return GOOD_STACK;
+}
+
+static void report_usercopy(const void *ptr, unsigned long len,
+                           bool to_user, const char *type)
+{
+       pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
+               to_user ? "exposure" : "overwrite",
+               to_user ? "from" : "to", ptr, type ? : "unknown", len);
+       /*
+        * For greater effect, it would be nice to do do_group_exit(),
+        * but BUG() actually hooks all the lock-breaking and per-arch
+        * Oops code, so that is used here instead.
+        */
+       BUG();
+}
+
+/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
+static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
+                    unsigned long high)
+{
+       unsigned long check_low = (uintptr_t)ptr;
+       unsigned long check_high = check_low + n;
+
+       /* Does not overlap if entirely above or entirely below. */
+       if (check_low >= high || check_high <= low)
+               return false;
+
+       return true;
+}
+
+/* Is this address range in the kernel text area? */
+static inline const char *check_kernel_text_object(const void *ptr,
+                                                  unsigned long n)
+{
+       unsigned long textlow = (unsigned long)_stext;
+       unsigned long texthigh = (unsigned long)_etext;
+       unsigned long textlow_linear, texthigh_linear;
+
+       if (overlaps(ptr, n, textlow, texthigh))
+               return "<kernel text>";
+
+       /*
+        * Some architectures have virtual memory mappings with a secondary
+        * mapping of the kernel text, i.e. there is more than one virtual
+        * kernel address that points to the kernel image. It is usually
+        * when there is a separate linear physical memory mapping, in that
+        * __pa() is not just the reverse of __va(). This can be detected
+        * and checked:
+        */
+       textlow_linear = (unsigned long)__va(__pa(textlow));
+       /* No different mapping: we're done. */
+       if (textlow_linear == textlow)
+               return NULL;
+
+       /* Check the secondary mapping... */
+       texthigh_linear = (unsigned long)__va(__pa(texthigh));
+       if (overlaps(ptr, n, textlow_linear, texthigh_linear))
+               return "<linear kernel text>";
+
+       return NULL;
+}
+
+static inline const char *check_bogus_address(const void *ptr, unsigned long n)
+{
+       /* Reject if object wraps past end of memory. */
+       if ((unsigned long)ptr + n < (unsigned long)ptr)
+               return "<wrapped address>";
+
+       /* Reject if NULL or ZERO-allocation. */
+       if (ZERO_OR_NULL_PTR(ptr))
+               return "<null>";
+
+       return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+                                           bool to_user)
+{
+       struct page *page, *endpage;
+       const void *end = ptr + n - 1;
+       bool is_reserved, is_cma;
+
+       /*
+        * Some architectures (arm64) return true for virt_addr_valid() on
+        * vmalloced addresses. Work around this by checking for vmalloc
+        * first.
+        */
+       if (is_vmalloc_addr(ptr))
+               return NULL;
+
+       if (!virt_addr_valid(ptr))
+               return NULL;
+
+       page = virt_to_head_page(ptr);
+
+       /* Check slab allocator for flags and size. */
+       if (PageSlab(page))
+               return __check_heap_object(ptr, n, page);
+
+       /*
+        * Sometimes the kernel data regions are not marked Reserved (see
+        * check below). And sometimes [_sdata,_edata) does not cover
+        * rodata and/or bss, so check each range explicitly.
+        */
+
+       /* Allow reads of kernel rodata region (if not marked as Reserved). */
+       if (ptr >= (const void *)__start_rodata &&
+           end <= (const void *)__end_rodata) {
+               if (!to_user)
+                       return "<rodata>";
+               return NULL;
+       }
+
+       /* Allow kernel data region (if not marked as Reserved). */
+       if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
+               return NULL;
+
+       /* Allow kernel bss region (if not marked as Reserved). */
+       if (ptr >= (const void *)__bss_start &&
+           end <= (const void *)__bss_stop)
+               return NULL;
+
+       /* Is the object wholly within one base page? */
+       if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
+                  ((unsigned long)end & (unsigned long)PAGE_MASK)))
+               return NULL;
+
+       /* Allow if start and end are inside the same compound page. */
+       endpage = virt_to_head_page(end);
+       if (likely(endpage == page))
+               return NULL;
+
+       /*
+        * Reject if range is entirely either Reserved (i.e. special or
+        * device memory), or CMA. Otherwise, reject since the object spans
+        * several independently allocated pages.
+        */
+       is_reserved = PageReserved(page);
+       is_cma = is_migrate_cma_page(page);
+       if (!is_reserved && !is_cma)
+               goto reject;
+
+       for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
+               page = virt_to_head_page(ptr);
+               if (is_reserved && !PageReserved(page))
+                       goto reject;
+               if (is_cma && !is_migrate_cma_page(page))
+                       goto reject;
+       }
+
+       return NULL;
+
+reject:
+       return "<spans multiple pages>";
+}
+
+/*
+ * Validates that the given object is:
+ * - not bogus address
+ * - known-safe heap or stack object
+ * - not in kernel text
+ */
+void __check_object_size(const void *ptr, unsigned long n, bool to_user)
+{
+       const char *err;
+
+       /* Skip all tests if size is zero. */
+       if (!n)
+               return;
+
+       /* Check for invalid addresses. */
+       err = check_bogus_address(ptr, n);
+       if (err)
+               goto report;
+
+       /* Check for bad heap object. */
+       err = check_heap_object(ptr, n, to_user);
+       if (err)
+               goto report;
+
+       /* Check for bad stack object. */
+       switch (check_stack_object(ptr, n)) {
+       case NOT_STACK:
+               /* Object is not touching the current process stack. */
+               break;
+       case GOOD_FRAME:
+       case GOOD_STACK:
+               /*
+                * Object is either in the correct frame (when it
+                * is possible to check) or just generally on the
+                * process stack (when frame checking not available).
+                */
+               return;
+       default:
+               err = "<process stack>";
+               goto report;
+       }
+
+       /* Check for object in kernel to avoid text exposure. */
+       err = check_kernel_text_object(ptr, n);
+       if (!err)
+               return;
+
+report:
+       report_usercopy(ptr, n, to_user, err);
+}
+EXPORT_SYMBOL(__check_object_size);
index 82a116b..8de138d 100644 (file)
@@ -169,7 +169,7 @@ int register_vlan_dev(struct net_device *dev)
        if (err < 0)
                goto out_uninit_mvrp;
 
-       vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
+       vlan->nest_level = dev_get_nest_level(real_dev) + 1;
        err = register_netdevice(dev);
        if (err < 0)
                goto out_uninit_mvrp;
index 4acb1d5..f24b25c 100644 (file)
@@ -507,8 +507,8 @@ err_out:
                /* wakeup anybody waiting for slots to pin pages */
                wake_up(&vp_wq);
        }
-       kfree(in_pages);
-       kfree(out_pages);
+       kvfree(in_pages);
+       kvfree(out_pages);
        return err;
 }
 
index ece45e0..0b5f729 100644 (file)
@@ -250,7 +250,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
        skb_free_datagram(sk, skb);
 
-       if (msg->msg_flags & MSG_TRUNC)
+       if (flags & MSG_TRUNC)
                copied = skblen;
 
        return err ? : copied;
index c045b3c..b0e23df 100644 (file)
@@ -262,6 +262,8 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                break;
        }
 
+       kfree_skb(hdev->req_skb);
+       hdev->req_skb = NULL;
        hdev->req_status = hdev->req_result = 0;
 
        BT_DBG("%s end: err %d", hdev->name, err);
index 6ef8a01..96f04b7 100644 (file)
@@ -1091,7 +1091,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
        skb_free_datagram(sk, skb);
 
-       if (msg->msg_flags & MSG_TRUNC)
+       if (flags & MSG_TRUNC)
                copied = skblen;
 
        return err ? : copied;
index 54ceb1f..d4cad29 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/crc16.h>
+#include <linux/filter.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -5835,6 +5836,9 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb,
                if (chan->sdu)
                        break;
 
+               if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE))
+                       break;
+
                chan->sdu_len = get_unaligned_le16(skb->data);
                skb_pull(skb, L2CAP_SDULEN_SIZE);
 
@@ -6610,6 +6614,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
                goto drop;
        }
 
+       if ((chan->mode == L2CAP_MODE_ERTM ||
+            chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
+               goto drop;
+
        if (!control->sframe) {
                int err;
 
index 1842141..a8ba752 100644 (file)
@@ -1019,7 +1019,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                goto done;
 
        if (pi->rx_busy_skb) {
-               if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb))
+               if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
                        pi->rx_busy_skb = NULL;
                else
                        goto done;
@@ -1270,7 +1270,17 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
                goto done;
        }
 
-       err = sock_queue_rcv_skb(sk, skb);
+       if (chan->mode != L2CAP_MODE_ERTM &&
+           chan->mode != L2CAP_MODE_STREAMING) {
+               /* Even if no filter is attached, we could potentially
+                * get errors from security modules, etc.
+                */
+               err = sk_filter(sk, skb);
+               if (err)
+                       goto done;
+       }
+
+       err = __sock_queue_rcv_skb(sk, skb);
 
        /* For ERTM, handle one skb that doesn't fit into the recv
         * buffer.  This is important to do because the data frames
index c18080a..cd620fa 100644 (file)
@@ -267,7 +267,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 
        /* If old entry was unassociated with any port, then delete it. */
        f = __br_fdb_get(br, br->dev->dev_addr, 0);
-       if (f && f->is_local && !f->dst)
+       if (f && f->is_local && !f->dst && !f->added_by_user)
                fdb_delete_local(br, NULL, f);
 
        fdb_insert(br, NULL, newaddr, 0);
@@ -282,7 +282,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
                if (!br_vlan_should_use(v))
                        continue;
                f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
-               if (f && f->is_local && !f->dst)
+               if (f && f->is_local && !f->dst && !f->added_by_user)
                        fdb_delete_local(br, NULL, f);
                fdb_insert(br, NULL, newaddr, v->vid);
        }
@@ -764,20 +764,25 @@ out:
 }
 
 /* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-                        __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+                        const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
 {
-       struct net_bridge *br = source->br;
        struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
        struct net_bridge_fdb_entry *fdb;
        bool modified = false;
 
        /* If the port cannot learn allow only local and static entries */
-       if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+       if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
            !(source->state == BR_STATE_LEARNING ||
              source->state == BR_STATE_FORWARDING))
                return -EPERM;
 
+       if (!source && !(state & NUD_PERMANENT)) {
+               pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+                       br->dev->name);
+               return -EINVAL;
+       }
+
        fdb = fdb_find(head, addr, vid);
        if (fdb == NULL) {
                if (!(flags & NLM_F_CREATE))
@@ -832,22 +837,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
        return 0;
 }
 
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
-              const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+                       struct net_bridge_port *p, const unsigned char *addr,
+                       u16 nlh_flags, u16 vid)
 {
        int err = 0;
 
        if (ndm->ndm_flags & NTF_USE) {
+               if (!p) {
+                       pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+                               br->dev->name);
+                       return -EINVAL;
+               }
                local_bh_disable();
                rcu_read_lock();
-               br_fdb_update(p->br, p, addr, vid, true);
+               br_fdb_update(br, p, addr, vid, true);
                rcu_read_unlock();
                local_bh_enable();
        } else {
-               spin_lock_bh(&p->br->hash_lock);
-               err = fdb_add_entry(p, addr, ndm->ndm_state,
+               spin_lock_bh(&br->hash_lock);
+               err = fdb_add_entry(br, p, addr, ndm->ndm_state,
                                    nlh_flags, vid);
-               spin_unlock_bh(&p->br->hash_lock);
+               spin_unlock_bh(&br->hash_lock);
        }
 
        return err;
@@ -884,6 +895,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                                dev->name);
                        return -EINVAL;
                }
+               br = p->br;
                vg = nbp_vlan_group(p);
        }
 
@@ -895,15 +907,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                }
 
                /* VID was specified, so use it. */
-               if (dev->priv_flags & IFF_EBRIDGE)
-                       err = br_fdb_insert(br, NULL, addr, vid);
-               else
-                       err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
        } else {
-               if (dev->priv_flags & IFF_EBRIDGE)
-                       err = br_fdb_insert(br, NULL, addr, 0);
-               else
-                       err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
                if (err || !vg || !vg->num_vlans)
                        goto out;
 
@@ -914,11 +920,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                list_for_each_entry(v, &vg->vlan_list, vlist) {
                        if (!br_vlan_should_use(v))
                                continue;
-                       if (dev->priv_flags & IFF_EBRIDGE)
-                               err = br_fdb_insert(br, NULL, addr, v->vid);
-                       else
-                               err = __br_fdb_add(ndm, p, addr, nlh_flags,
-                                                  v->vid);
+                       err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
                        if (err)
                                goto out;
                }
index c83326c..ef34a02 100644 (file)
@@ -574,7 +574,7 @@ static void complete_generic_request(struct ceph_mon_generic_request *req)
        put_generic_request(req);
 }
 
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
 {
        struct ceph_mon_client *monc = req->monc;
        struct ceph_mon_generic_request *lookup_req;
index b5ec096..a97e7b5 100644 (file)
@@ -4220,7 +4220,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
 
                pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
                                               GFP_NOIO);
-               if (!pages) {
+               if (IS_ERR(pages)) {
                        ceph_msg_put(m);
                        return NULL;
                }
index ca53c83..22fb96e 100644 (file)
@@ -84,12 +84,6 @@ retry:
 }
 EXPORT_SYMBOL(ceph_find_or_create_string);
 
-static void ceph_free_string(struct rcu_head *head)
-{
-       struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
-       kfree(cs);
-}
-
 void ceph_release_string(struct kref *ref)
 {
        struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
@@ -101,7 +95,7 @@ void ceph_release_string(struct kref *ref)
        }
        spin_unlock(&string_tree_lock);
 
-       call_rcu(&cs->rcu, ceph_free_string);
+       kfree_rcu(cs, rcu);
 }
 EXPORT_SYMBOL(ceph_release_string);
 
index 4ce07dc..dd6ce59 100644 (file)
@@ -6045,8 +6045,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
 
-int dev_get_nest_level(struct net_device *dev,
-                      bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
 {
        struct net_device *lower = NULL;
        struct list_head *iter;
@@ -6056,15 +6055,12 @@ int dev_get_nest_level(struct net_device *dev,
        ASSERT_RTNL();
 
        netdev_for_each_lower_dev(dev, lower, iter) {
-               nest = dev_get_nest_level(lower, type_check);
+               nest = dev_get_nest_level(lower);
                if (max_nest < nest)
                        max_nest = nest;
        }
 
-       if (type_check(dev))
-               max_nest++;
-
-       return max_nest;
+       return max_nest + 1;
 }
 EXPORT_SYMBOL(dev_get_nest_level);
 
index 5708999..cb06ace 100644 (file)
@@ -1355,56 +1355,47 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
 {
        int err;
 
-       if (!skb_cloned(skb))
-               return 0;
-       if (skb_clone_writable(skb, write_len))
-               return 0;
-       err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-       if (!err)
-               bpf_compute_data_end(skb);
+       err = skb_ensure_writable(skb, write_len);
+       bpf_compute_data_end(skb);
+
        return err;
 }
 
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+       if (skb_at_tc_ingress(skb))
+               skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+       if (skb_at_tc_ingress(skb))
+               skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
-       struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
        struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       int offset = (int) r2;
+       unsigned int offset = (unsigned int) r2;
        void *from = (void *) (long) r3;
        unsigned int len = (unsigned int) r4;
        void *ptr;
 
        if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
                return -EINVAL;
-
-       /* bpf verifier guarantees that:
-        * 'from' pointer points to bpf program stack
-        * 'len' bytes of it were initialized
-        * 'len' > 0
-        * 'skb' is a valid pointer to 'struct sk_buff'
-        *
-        * so check for invalid 'offset' and too large 'len'
-        */
-       if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
+       if (unlikely(offset > 0xffff))
                return -EFAULT;
        if (unlikely(bpf_try_make_writable(skb, offset + len)))
                return -EFAULT;
 
-       ptr = skb_header_pointer(skb, offset, len, sp->buff);
-       if (unlikely(!ptr))
-               return -EFAULT;
-
+       ptr = skb->data + offset;
        if (flags & BPF_F_RECOMPUTE_CSUM)
-               skb_postpull_rcsum(skb, ptr, len);
+               __skb_postpull_rcsum(skb, ptr, len, offset);
 
        memcpy(ptr, from, len);
 
-       if (ptr == sp->buff)
-               /* skb_store_bits cannot return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, len);
-
        if (flags & BPF_F_RECOMPUTE_CSUM)
-               skb_postpush_rcsum(skb, ptr, len);
+               __skb_postpush_rcsum(skb, ptr, len, offset);
        if (flags & BPF_F_INVALIDATE_HASH)
                skb_clear_hash(skb);
 
@@ -1425,12 +1416,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
        const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
-       int offset = (int) r2;
+       unsigned int offset = (unsigned int) r2;
        void *to = (void *)(unsigned long) r3;
        unsigned int len = (unsigned int) r4;
        void *ptr;
 
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff))
                goto err_clear;
 
        ptr = skb_header_pointer(skb, offset, len, to);
@@ -1458,20 +1449,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
 static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
        struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       int offset = (int) r2;
-       __sum16 sum, *ptr;
+       unsigned int offset = (unsigned int) r2;
+       __sum16 *ptr;
 
        if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
                return -EINVAL;
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff || offset & 1))
                return -EFAULT;
-       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
-               return -EFAULT;
-
-       ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-       if (unlikely(!ptr))
+       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                return -EFAULT;
 
+       ptr = (__sum16 *)(skb->data + offset);
        switch (flags & BPF_F_HDR_FIELD_MASK) {
        case 0:
                if (unlikely(from != 0))
@@ -1489,10 +1477,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
                return -EINVAL;
        }
 
-       if (ptr == &sum)
-               /* skb_store_bits guaranteed to not return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, sizeof(sum));
-
        return 0;
 }
 
@@ -1512,20 +1496,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
        struct sk_buff *skb = (struct sk_buff *) (long) r1;
        bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
        bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
-       int offset = (int) r2;
-       __sum16 sum, *ptr;
+       unsigned int offset = (unsigned int) r2;
+       __sum16 *ptr;
 
        if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
                               BPF_F_HDR_FIELD_MASK)))
                return -EINVAL;
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff || offset & 1))
                return -EFAULT;
-       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                return -EFAULT;
 
-       ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-       if (unlikely(!ptr))
-               return -EFAULT;
+       ptr = (__sum16 *)(skb->data + offset);
        if (is_mmzero && !*ptr)
                return 0;
 
@@ -1548,10 +1530,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 
        if (is_mmzero && !*ptr)
                *ptr = CSUM_MANGLED_0;
-       if (ptr == &sum)
-               /* skb_store_bits guaranteed to not return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, sizeof(sum));
-
        return 0;
 }
 
@@ -1607,9 +1585,6 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
 
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
-       if (skb_at_tc_ingress(skb))
-               skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
-
        return dev_forward_skb(dev, skb);
 }
 
@@ -1648,6 +1623,8 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
        if (unlikely(!skb))
                return -ENOMEM;
 
+       bpf_push_mac_rcsum(skb);
+
        return flags & BPF_F_INGRESS ?
               __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
@@ -1693,6 +1670,8 @@ int skb_do_redirect(struct sk_buff *skb)
                return -EINVAL;
        }
 
+       bpf_push_mac_rcsum(skb);
+
        return ri->flags & BPF_F_INGRESS ?
               __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
@@ -1756,7 +1735,10 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
                     vlan_proto != htons(ETH_P_8021AD)))
                vlan_proto = htons(ETH_P_8021Q);
 
+       bpf_push_mac_rcsum(skb);
        ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+       bpf_pull_mac_rcsum(skb);
+
        bpf_compute_data_end(skb);
        return ret;
 }
@@ -1776,7 +1758,10 @@ static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
        struct sk_buff *skb = (struct sk_buff *) (long) r1;
        int ret;
 
+       bpf_push_mac_rcsum(skb);
        ret = skb_vlan_pop(skb);
+       bpf_pull_mac_rcsum(skb);
+
        bpf_compute_data_end(skb);
        return ret;
 }
@@ -2298,7 +2283,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 }
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
        struct sk_buff *skb = (struct sk_buff *)(long)r1;
        struct bpf_map *map = (struct bpf_map *)(long)r2;
@@ -2321,8 +2306,8 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
        return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
 }
 
-static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
-       .func           = bpf_skb_in_cgroup,
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+       .func           = bpf_skb_under_cgroup,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
@@ -2402,8 +2387,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
 #ifdef CONFIG_SOCK_CGROUP_DATA
-       case BPF_FUNC_skb_in_cgroup:
-               return &bpf_skb_in_cgroup_proto;
+       case BPF_FUNC_skb_under_cgroup:
+               return &bpf_skb_under_cgroup_proto;
 #endif
        default:
                return sk_filter_func_proto(func_id);
index d07fc07..e2ffc2a 100644 (file)
@@ -249,7 +249,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv)
  * index into the parent's child array. That is, they will be used to find
  * 'n' among tp's children.
  *
- * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
+ * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
  * for the node n.
  *
  * All the bits we have seen so far are significant to the node n. The rest
@@ -258,7 +258,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv)
  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
  * n's child array, and will of course be different for each child.
  *
- * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
+ * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
  * at this point.
  */
 
@@ -2452,9 +2452,7 @@ struct fib_route_iter {
 static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
                                            loff_t pos)
 {
-       struct fib_table *tb = iter->main_tb;
        struct key_vector *l, **tp = &iter->tnode;
-       struct trie *t;
        t_key key;
 
        /* use cache location of next-to-find key */
@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
                pos -= iter->pos;
                key = iter->key;
        } else {
-               t = (struct trie *)tb->tb_data;
-               iter->tnode = t->kv;
                iter->pos = 0;
                key = 0;
        }
@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
                return NULL;
 
        iter->main_tb = tb;
+       t = (struct trie *)tb->tb_data;
+       iter->tnode = t->kv;
 
        if (*pos != 0)
                return fib_route_get_idx(iter, *pos);
 
-       t = (struct trie *)tb->tb_data;
-       iter->tnode = t->kv;
        iter->pos = 0;
        iter->key = 0;
 
index 5b1481b..113cc43 100644 (file)
@@ -370,7 +370,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
                         tunnel->parms.o_flags, proto, tunnel->parms.o_key,
                         htonl(tunnel->o_seqno));
 
-       skb_set_inner_protocol(skb, proto);
        ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
 
index 9d847c3..0f227db 100644 (file)
@@ -73,9 +73,11 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
        skb_dst_set(skb, &rt->dst);
        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-       if (skb_iif && proto == IPPROTO_UDP) {
-               /* Arrived from an ingress interface and got udp encapuslated.
-                * The encapsulated network segment length may exceed dst mtu.
+       if (skb_iif && !(df & htons(IP_DF))) {
+               /* Arrived from an ingress interface, got encapsulated, with
+                * fragmentation of encapulating frames allowed.
+                * If skb is gso, the resulting encapsulated network segments
+                * may exceed dst mtu.
                 * Allow IP Fragmentation of segments.
                 */
                IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
index a917903..cc701fa 100644 (file)
@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
        .get_link_net   = ip_tunnel_get_link_net,
 };
 
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+       return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+                           unsigned long event, void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+
+       if (!is_vti_tunnel(dev))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_DOWN:
+               if (!net_eq(tunnel->net, dev_net(dev)))
+                       xfrm_garbage_collect(tunnel->net);
+               break;
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+       .notifier_call = vti_device_event,
+};
+
 static int __init vti_init(void)
 {
        const char *msg;
@@ -564,6 +591,8 @@ static int __init vti_init(void)
 
        pr_info("IPv4 over IPsec tunneling driver\n");
 
+       register_netdevice_notifier(&vti_notifier_block);
+
        msg = "tunnel device";
        err = register_pernet_device(&vti_net_ops);
        if (err < 0)
@@ -596,6 +625,7 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
        unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
+       unregister_netdevice_notifier(&vti_notifier_block);
        pr_err("vti init: failed to register %s\n", msg);
        return err;
 }
@@ -607,6 +637,7 @@ static void __exit vti_fini(void)
        xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
        xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
        unregister_pernet_device(&vti_net_ops);
+       unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
index 032a96d..ffbb218 100644 (file)
@@ -3193,7 +3193,6 @@ int tcp_abort(struct sock *sk, int err)
                        local_bh_enable();
                        return 0;
                }
-               sock_gen_put(sk);
                return -EOPNOTSUPP;
        }
 
@@ -3222,7 +3221,6 @@ int tcp_abort(struct sock *sk, int err)
        bh_unlock_sock(sk);
        local_bh_enable();
        release_sock(sk);
-       sock_put(sk);
        return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_abort);
index 4d61093..a748c74 100644 (file)
@@ -54,11 +54,16 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
 {
        struct net *net = sock_net(in_skb->sk);
        struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+       int err;
 
        if (IS_ERR(sk))
                return PTR_ERR(sk);
 
-       return sock_diag_destroy(sk, ECONNABORTED);
+       err = sock_diag_destroy(sk, ECONNABORTED);
+
+       sock_gen_put(sk);
+
+       return err;
 }
 #endif
 
index 32b048e..7158d4f 100644 (file)
@@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
                                             tcp_sk(sk)->snd_nxt;
 
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v4_send_ack(sock_net(sk), skb, seq,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp,
                        req->ts_recent,
                        0,
index e61f7cd..5fdcb8d 100644 (file)
@@ -1182,13 +1182,13 @@ out:
  *     @sk: socket
  *
  *     Drops all bad checksum frames, until a valid one is found.
- *     Returns the length of found skb, or 0 if none is found.
+ *     Returns the length of found skb, or -1 if none is found.
  */
-static unsigned int first_packet_length(struct sock *sk)
+static int first_packet_length(struct sock *sk)
 {
        struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
        struct sk_buff *skb;
-       unsigned int res;
+       int res;
 
        __skb_queue_head_init(&list_kill);
 
@@ -1203,7 +1203,7 @@ static unsigned int first_packet_length(struct sock *sk)
                __skb_unlink(skb, rcvq);
                __skb_queue_tail(&list_kill, skb);
        }
-       res = skb ? skb->len : 0;
+       res = skb ? skb->len : -1;
        spin_unlock_bh(&rcvq->lock);
 
        if (!skb_queue_empty(&list_kill)) {
@@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
        case SIOCINQ:
        {
-               unsigned int amount = first_packet_length(sk);
+               int amount = max_t(int, 0, first_packet_length(sk));
 
                return put_user(amount, (int __user *)arg);
        }
@@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
        /* Check for false positives due to checksum errors */
        if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
-           !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
+           !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
                mask &= ~(POLLIN | POLLRDNORM);
 
        return mask;
@@ -2216,7 +2216,6 @@ struct proto udp_prot = {
        .sysctl_wmem       = &sysctl_udp_wmem_min,
        .sysctl_rmem       = &sysctl_udp_rmem_min,
        .obj_size          = sizeof(struct udp_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udp_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udp_setsockopt,
index 3b3efbd..2eea073 100644 (file)
@@ -55,7 +55,6 @@ struct proto  udplite_prot = {
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v4_get_port,
        .obj_size          = sizeof(struct udp_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udp_setsockopt,
index ab3e796..f418d2e 100644 (file)
@@ -1872,7 +1872,6 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
-       struct in6_addr addr;
        struct inet6_dev *idev = ifp->idev;
        struct net *net = dev_net(ifp->idev->dev);
 
@@ -1934,18 +1933,6 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
                in6_ifa_put(ifp2);
 lock_errdad:
                spin_lock_bh(&ifp->lock);
-       } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
-               addr.s6_addr32[0] = htonl(0xfe800000);
-               addr.s6_addr32[1] = 0;
-
-               if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
-                   ipv6_addr_equal(&ifp->addr, &addr)) {
-                       /* DAD failed for link-local based on MAC address */
-                       idev->cnf.disable_ipv6 = 1;
-
-                       pr_info("%s: IPv6 being disabled!\n",
-                               ifp->idev->dev->name);
-               }
        }
 
 errdad:
@@ -3543,7 +3530,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
        /* combine the user config with event to determine if permanent
         * addresses are to be removed from address hash table
         */
-       keep_addr = !(how || _keep_addr <= 0);
+       keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
 
        /* Step 2: clear hash table */
        for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3599,7 +3586,7 @@ restart:
        /* re-combine the user config with event to determine if permanent
         * addresses are to be removed from the interface list
         */
-       keep_addr = (!how && _keep_addr > 0);
+       keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
        INIT_LIST_HEAD(&del_list);
        list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
@@ -3821,6 +3808,7 @@ static void addrconf_dad_work(struct work_struct *w)
                                                dad_work);
        struct inet6_dev *idev = ifp->idev;
        struct in6_addr mcaddr;
+       bool disable_ipv6 = false;
 
        enum {
                DAD_PROCESS,
@@ -3837,6 +3825,24 @@ static void addrconf_dad_work(struct work_struct *w)
        } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
                action = DAD_ABORT;
                ifp->state = INET6_IFADDR_STATE_POSTDAD;
+
+               if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+                   !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
+                       struct in6_addr addr;
+
+                       addr.s6_addr32[0] = htonl(0xfe800000);
+                       addr.s6_addr32[1] = 0;
+
+                       if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+                           ipv6_addr_equal(&ifp->addr, &addr)) {
+                               /* DAD failed for link-local based on MAC */
+                               idev->cnf.disable_ipv6 = 1;
+
+                               pr_info("%s: IPv6 being disabled!\n",
+                                       ifp->idev->dev->name);
+                               disable_ipv6 = true;
+                       }
+               }
        }
        spin_unlock_bh(&ifp->lock);
 
@@ -3845,6 +3851,8 @@ static void addrconf_dad_work(struct work_struct *w)
                goto out;
        } else if (action == DAD_ABORT) {
                addrconf_dad_stop(ifp, 1);
+               if (disable_ipv6)
+                       addrconf_ifdown(idev->dev, 0);
                goto out;
        }
 
index c53b92c..37ac9de 100644 (file)
@@ -952,8 +952,10 @@ calipso_opt_insert(struct ipv6_opt_hdr *hop,
                memcpy(new, hop, start);
        ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
                                 secattr);
-       if (ret_val < 0)
+       if (ret_val < 0) {
+               kfree(new);
                return ERR_PTR(ret_val);
+       }
 
        buf_len = start + ret_val;
        /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
index 776d145..704274c 100644 (file)
@@ -519,8 +519,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
        gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
                         protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 
-       skb_set_inner_protocol(skb, protocol);
-
        return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
                            NEXTHDR_GRE);
 }
index fed40d1..0900352 100644 (file)
@@ -55,7 +55,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct icmp6hdr user_icmph;
        int addr_type;
        struct in6_addr *daddr;
-       int iif = 0;
+       int oif = 0;
        struct flowi6 fl6;
        int err;
        struct dst_entry *dst;
@@ -78,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                if (u->sin6_family != AF_INET6) {
                        return -EAFNOSUPPORT;
                }
-               if (sk->sk_bound_dev_if &&
-                   sk->sk_bound_dev_if != u->sin6_scope_id) {
-                       return -EINVAL;
-               }
                daddr = &(u->sin6_addr);
-               iif = u->sin6_scope_id;
+               if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+                       oif = u->sin6_scope_id;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
                daddr = &sk->sk_v6_daddr;
        }
 
-       if (!iif)
-               iif = sk->sk_bound_dev_if;
+       if (!oif)
+               oif = sk->sk_bound_dev_if;
+
+       if (!oif)
+               oif = np->sticky_pktinfo.ipi6_ifindex;
+
+       if (!oif && ipv6_addr_is_multicast(daddr))
+               oif = np->mcast_oif;
+       else if (!oif)
+               oif = np->ucast_oif;
 
        addr_type = ipv6_addr_type(daddr);
-       if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
-               return -EINVAL;
-       if (addr_type & IPV6_ADDR_MAPPED)
+       if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+           (addr_type & IPV6_ADDR_MAPPED) ||
+           (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
                return -EINVAL;
 
        /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -106,16 +111,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.flowi6_proto = IPPROTO_ICMPV6;
        fl6.saddr = np->saddr;
        fl6.daddr = *daddr;
+       fl6.flowi6_oif = oif;
        fl6.flowi6_mark = sk->sk_mark;
        fl6.fl6_icmp_type = user_icmph.icmp6_type;
        fl6.fl6_icmp_code = user_icmph.icmp6_code;
        security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-       if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
-               fl6.flowi6_oif = np->mcast_oif;
-       else if (!fl6.flowi6_oif)
-               fl6.flowi6_oif = np->ucast_oif;
-
        ipc6.tclass = np->tclass;
        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
index 33df8b8..94f4f89 100644 (file)
@@ -944,9 +944,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
                        0, 0);
index 81e2f98..19ac3a1 100644 (file)
@@ -1460,7 +1460,6 @@ struct proto udpv6_prot = {
        .sysctl_wmem       = &sysctl_udp_wmem_min,
        .sysctl_rmem       = &sysctl_udp_rmem_min,
        .obj_size          = sizeof(struct udp6_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udp_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udpv6_setsockopt,
index 9cf097e..fd6ef41 100644 (file)
@@ -50,7 +50,6 @@ struct proto udplitev6_prot = {
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v6_get_port,
        .obj_size          = sizeof(struct udp6_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udpv6_setsockopt,
index 4a7ae32..1138eaf 100644 (file)
@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
 
        self->magic = IAS_MAGIC;
        self->mode = mode;
-       if (mode == IAS_CLIENT)
-               iriap_register_lsap(self, slsap_sel, mode);
+       if (mode == IAS_CLIENT) {
+               if (iriap_register_lsap(self, slsap_sel, mode)) {
+                       kfree(self);
+                       return NULL;
+               }
+       }
 
        self->confirm = callback;
        self->priv = priv;
index d9560aa..232cb92 100644 (file)
@@ -856,7 +856,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
        error = -ENOTCONN;
        if (sk == NULL)
                goto end;
-       if (sk->sk_state != PPPOX_CONNECTED)
+       if (!(sk->sk_state & PPPOX_CONNECTED))
                goto end;
 
        error = -EBADF;
index 47e99ab..543b1d4 100644 (file)
@@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 
        /* free all potentially still buffered bcast frames */
        local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
-       skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+       ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
 
        mutex_lock(&local->mtx);
        ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
index 184473c..ba5fc1f 100644 (file)
@@ -1094,7 +1094,7 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
 
        trace_drv_get_expected_throughput(sta);
        if (local->ops->get_expected_throughput)
-               ret = local->ops->get_expected_throughput(sta);
+               ret = local->ops->get_expected_throughput(&local->hw, sta);
        trace_drv_return_u32(local, ret);
 
        return ret;
index c66411d..42120d9 100644 (file)
@@ -881,20 +881,22 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 
        netif_carrier_off(sdata->dev);
 
+       /* flush STAs and mpaths on this iface */
+       sta_info_flush(sdata);
+       mesh_path_flush_by_iface(sdata);
+
        /* stop the beacon */
        ifmsh->mesh_id_len = 0;
        sdata->vif.bss_conf.enable_beacon = false;
        clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
        ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+
+       /* remove beacon */
        bcn = rcu_dereference_protected(ifmsh->beacon,
                                        lockdep_is_held(&sdata->wdev.mtx));
        RCU_INIT_POINTER(ifmsh->beacon, NULL);
        kfree_rcu(bcn, rcu_head);
 
-       /* flush STAs and mpaths on this iface */
-       sta_info_flush(sdata);
-       mesh_path_flush_by_iface(sdata);
-
        /* free all potentially still buffered group-addressed frames */
        local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
        skb_queue_purge(&ifmsh->ps.bc_buf);
index 2e8a902..9dce3b1 100644 (file)
@@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta)
        for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
                struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
 
-               if (!txqi->tin.backlog_packets)
+               if (txqi->tin.backlog_packets)
                        set_bit(tid, &sta->txq_buffered_tids);
                else
                        clear_bit(tid, &sta->txq_buffered_tids);
index c6d5c72..a2a6826 100644 (file)
@@ -771,6 +771,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
                        clear_sta_flag(sta, WLAN_STA_SP);
 
                acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+               /* mesh Peer Service Period support */
+               if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+                   ieee80211_is_data_qos(fc))
+                       ieee80211_mpsp_trigger_process(
+                               ieee80211_get_qos_ctl(hdr), sta, true, acked);
+
                if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
                        /*
                         * The STA is in power save mode, so assume
@@ -781,13 +788,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
                        return;
                }
 
-               /* mesh Peer Service Period support */
-               if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
-                   ieee80211_is_data_qos(fc))
-                       ieee80211_mpsp_trigger_process(
-                                       ieee80211_get_qos_ctl(hdr),
-                                       sta, true, acked);
-
                if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
                    (ieee80211_is_data(hdr->frame_control)) &&
                    (rates_idx != -1))
index 91461c4..5023966 100644 (file)
@@ -368,7 +368,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
                skb = skb_dequeue(&ps->bc_buf);
                if (skb) {
                        purged++;
-                       dev_kfree_skb(skb);
+                       ieee80211_free_txskb(&local->hw, skb);
                }
                total += skb_queue_len(&ps->bc_buf);
        }
@@ -451,7 +451,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
        if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
                ps_dbg(tx->sdata,
                       "BC TX buffer full - dropping the oldest frame\n");
-               dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+               ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
        } else
                tx->local->total_ps_buffered++;
 
@@ -4275,7 +4275,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
                        sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
                if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
                        break;
-               dev_kfree_skb_any(skb);
+               ieee80211_free_txskb(hw, skb);
        }
 
        info = IEEE80211_SKB_CB(skb);
index 9e36931..f8dbacf 100644 (file)
@@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
        helper = rcu_dereference(nfct_help(expect->master)->helper);
        if (helper) {
                seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
-               if (helper->expect_policy[expect->class].name)
+               if (helper->expect_policy[expect->class].name[0])
                        seq_printf(s, "/%s",
                                   helper->expect_policy[expect->class].name);
        }
index bb77a97..5c0db5c 100644 (file)
@@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
                                 "timeout to %u seconds for",
                                 info->timeout);
                        nf_ct_dump_tuple(&exp->tuple);
-                       mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
+                       mod_timer_pending(&exp->timeout,
+                                         jiffies + info->timeout * HZ);
                }
                spin_unlock_bh(&nf_conntrack_expect_lock);
        }
index 050bb34..fdfc71f 100644 (file)
@@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 
                        if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
                                return -EINVAL;
+                       if (otuple.dst.protonum != rtuple.dst.protonum)
+                               return -EINVAL;
 
                        ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
                                                        &rtuple, u3);
@@ -2362,12 +2364,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
                return PTR_ERR(exp);
 
        err = nf_ct_expect_related_report(exp, portid, report);
-       if (err < 0) {
-               nf_ct_expect_put(exp);
-               return err;
-       }
-
-       return 0;
+       nf_ct_expect_put(exp);
+       return err;
 }
 
 static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
index 8d9db9d..7d77217 100644 (file)
@@ -1383,7 +1383,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
                return NF_DROP;
        }
        cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-       if (!cseq) {
+       if (!cseq && *(*dptr + matchoff) != '0') {
                nf_ct_helper_log(skb, ct, "cannot get cseq");
                return NF_DROP;
        }
@@ -1446,7 +1446,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
                        return NF_DROP;
                }
                cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-               if (!cseq) {
+               if (!cseq && *(*dptr + matchoff) != '0') {
                        nf_ct_helper_log(skb, ct, "cannot get cseq");
                        return NF_DROP;
                }
index 958a145..9f267c3 100644 (file)
@@ -205,6 +205,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
        const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
+       struct net *net = seq_file_net(s);
        int ret = 0;
 
        NF_CT_ASSERT(ct);
@@ -215,6 +216,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (NF_CT_DIRECTION(hash))
                goto release;
 
+       if (!net_eq(nf_ct_net(ct), net))
+               goto release;
+
        l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
        NF_CT_ASSERT(l3proto);
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
index 1b4de4b..70eb2f6 100644 (file)
@@ -326,14 +326,14 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
 {
        int ret = 0;
 
-       /* we want to avoid races with nfnl_acct_find_get. */
-       if (atomic_dec_and_test(&cur->refcnt)) {
+       /* We want to avoid races with nfnl_acct_put. So only when the current
+        * refcnt is 1, we decrease it to 0.
+        */
+       if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&cur->head);
                kfree_rcu(cur, rcu_head);
        } else {
-               /* still in use, restore reference counter. */
-               atomic_inc(&cur->refcnt);
                ret = -EBUSY;
        }
        return ret;
@@ -443,7 +443,7 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
-static void nfnl_overquota_report(struct nf_acct *nfacct)
+static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 {
        int ret;
        struct sk_buff *skb;
@@ -458,11 +458,12 @@ static void nfnl_overquota_report(struct nf_acct *nfacct)
                kfree_skb(skb);
                return;
        }
-       netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+       netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
                          GFP_ATOMIC);
 }
 
-int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+                       struct nf_acct *nfacct)
 {
        u64 now;
        u64 *quota;
@@ -480,7 +481,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
 
        if (now >= *quota &&
            !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
-               nfnl_overquota_report(nfacct);
+               nfnl_overquota_report(net, nfacct);
        }
 
        return ret;
index 4cdcd96..68216cd 100644 (file)
@@ -330,16 +330,16 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 {
        int ret = 0;
 
-       /* we want to avoid races with nf_ct_timeout_find_get. */
-       if (atomic_dec_and_test(&timeout->refcnt)) {
+       /* We want to avoid races with ctnl_timeout_put. So only when the
+        * current refcnt is 1, we decrease it to 0.
+        */
+       if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&timeout->head);
                nf_ct_l4proto_put(timeout->l4proto);
                ctnl_untimeout(net, timeout);
                kfree_rcu(timeout, rcu_head);
        } else {
-               /* still in use, restore reference counter. */
-               atomic_inc(&timeout->refcnt);
                ret = -EBUSY;
        }
        return ret;
@@ -543,7 +543,9 @@ err:
 
 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
 {
-       atomic_dec(&timeout->refcnt);
+       if (atomic_dec_and_test(&timeout->refcnt))
+               kfree_rcu(timeout, rcu_head);
+
        module_put(THIS_MODULE);
 }
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
@@ -591,7 +593,9 @@ static void __net_exit cttimeout_net_exit(struct net *net)
        list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
                list_del_rcu(&cur->head);
                nf_ct_l4proto_put(cur->l4proto);
-               kfree_rcu(cur, rcu_head);
+
+               if (atomic_dec_and_test(&cur->refcnt))
+                       kfree_rcu(cur, rcu_head);
        }
 }
 
index cbcfdfb..6577db5 100644 (file)
@@ -1147,6 +1147,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
 MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
+MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
 
 module_init(nfnetlink_log_init);
 module_exit(nfnetlink_log_fini);
index 5d36a09..f49f450 100644 (file)
@@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
        int err;
 
-       queue = instance_lookup(q, queue_num);
-       if (!queue)
-               queue = verdict_instance_lookup(q, queue_num,
-                                               NETLINK_CB(skb).portid);
+       queue = verdict_instance_lookup(q, queue_num,
+                                       NETLINK_CB(skb).portid);
        if (IS_ERR(queue))
                return PTR_ERR(queue);
 
index ba7aed1..82c264e 100644 (file)
@@ -59,6 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
                           const struct nlattr * const tb[])
 {
        struct nft_exthdr *priv = nft_expr_priv(expr);
+       u32 offset, len;
 
        if (tb[NFTA_EXTHDR_DREG] == NULL ||
            tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -66,9 +67,15 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
            tb[NFTA_EXTHDR_LEN] == NULL)
                return -EINVAL;
 
+       offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+       len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+
+       if (offset > U8_MAX || len > U8_MAX)
+               return -ERANGE;
+
        priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
-       priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
-       priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+       priv->offset = offset;
+       priv->len    = len;
        priv->dreg   = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
 
        return nft_validate_register_store(ctx, priv->dreg, NULL,
index 6473936..ffe9ae0 100644 (file)
@@ -70,7 +70,6 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                } else if (d > 0)
                        parent = parent->rb_right;
                else {
-found:
                        if (!nft_set_elem_active(&rbe->ext, genmask)) {
                                parent = parent->rb_left;
                                continue;
@@ -84,9 +83,12 @@ found:
                }
        }
 
-       if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
-               rbe = interval;
-               goto found;
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
+           !nft_rbtree_interval_end(interval)) {
+               spin_unlock_bh(&nft_rbtree_lock);
+               *ext = &interval->ext;
+               return true;
        }
 out:
        spin_unlock_bh(&nft_rbtree_lock);
index 7f4414d..663c4c3 100644 (file)
@@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
                                                    daddr, dport,
                                                    in->ifindex);
 
+                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                               sk = NULL;
                        /* NOTE: we return listeners even if bound to
                         * 0.0.0.0, those are filtered out in
                         * xt_socket, since xt_TPROXY needs 0 bound
@@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
                                                   daddr, ntohs(dport),
                                                   in->ifindex);
 
+                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                               sk = NULL;
                        /* NOTE: we return listeners even if bound to
                         * 0.0.0.0, those are filtered out in
                         * xt_socket, since xt_TPROXY needs 0 bound
index 3048a7e..cf32759 100644 (file)
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        nfnl_acct_update(skb, info->nfacct);
 
-       overquota = nfnl_acct_overquota(skb, info->nfacct);
+       overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
 
        return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
index c644c78..e054a74 100644 (file)
@@ -433,7 +433,6 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
        struct nf_conntrack_l4proto *l4proto;
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
-       enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
        unsigned int dataoff;
        u8 protonum;
@@ -458,13 +457,8 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 
        ct = nf_ct_tuplehash_to_ctrack(h);
 
-       ctinfo = ovs_ct_get_info(h);
-       if (ctinfo == IP_CT_NEW) {
-               /* This should not happen. */
-               WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
-       }
        skb->nfct = &ct->ct_general;
-       skb->nfctinfo = ctinfo;
+       skb->nfctinfo = ovs_ct_get_info(h);
        return ct;
 }
 
index 1a1fcec..5aaf3ba 100644 (file)
@@ -93,7 +93,14 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
                return ERR_CAST(dev);
        }
 
-       dev_change_flags(dev, dev->flags | IFF_UP);
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               goto error;
+       }
+
        rtnl_unlock();
        return vport;
 error:
index 7f8897f..0e72d95 100644 (file)
@@ -54,6 +54,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
        struct net *net = ovs_dp_get_net(parms->dp);
        struct net_device *dev;
        struct vport *vport;
+       int err;
 
        vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
        if (IS_ERR(vport))
@@ -67,9 +68,15 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
                return ERR_CAST(dev);
        }
 
-       dev_change_flags(dev, dev->flags | IFF_UP);
-       rtnl_unlock();
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               return ERR_PTR(err);
+       }
 
+       rtnl_unlock();
        return vport;
 }
 
index 434e04c..95c3614 100644 (file)
@@ -140,7 +140,7 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
 {
-       dev->needed_headroom = new_hr;
+       dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
 }
 
 static const struct net_device_ops internal_dev_netdev_ops = {
index 5eb7694..7eb955e 100644 (file)
@@ -130,7 +130,14 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
                return ERR_CAST(dev);
        }
 
-       dev_change_flags(dev, dev->flags | IFF_UP);
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               goto error;
+       }
+
        rtnl_unlock();
        return vport;
 error:
index 1bb9e7a..ff83fb1 100644 (file)
@@ -425,6 +425,7 @@ struct rxrpc_call {
        spinlock_t              lock;
        rwlock_t                state_lock;     /* lock for state transition */
        atomic_t                usage;
+       atomic_t                skb_count;      /* Outstanding packets on this call */
        atomic_t                sequence;       /* Tx data packet sequence counter */
        u32                     local_abort;    /* local abort code */
        u32                     remote_abort;   /* remote abort code */
index 0b28321..9bae21e 100644 (file)
@@ -130,6 +130,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
                        call->state = RXRPC_CALL_SERVER_ACCEPTING;
                        list_add_tail(&call->accept_link, &rx->acceptq);
                        rxrpc_get_call(call);
+                       atomic_inc(&call->skb_count);
                        nsp = rxrpc_skb(notification);
                        nsp->call = call;
 
index fc32aa5..e60cf65 100644 (file)
@@ -460,6 +460,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
        ASSERTCMP(sp->call, ==, NULL);
        sp->call = call;
        rxrpc_get_call(call);
+       atomic_inc(&call->skb_count);
 
        /* insert into the buffer in sequence order */
        spin_lock_bh(&call->lock);
@@ -734,6 +735,7 @@ all_acked:
                skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
                sp->call = call;
                rxrpc_get_call(call);
+               atomic_inc(&call->skb_count);
                spin_lock_bh(&call->lock);
                if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
                        BUG();
@@ -793,6 +795,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
                sp->error = error;
                sp->call = call;
                rxrpc_get_call(call);
+               atomic_inc(&call->skb_count);
 
                spin_lock_bh(&call->lock);
                ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
@@ -834,6 +837,9 @@ void rxrpc_process_call(struct work_struct *work)
                return;
        }
 
+       if (!call->conn)
+               goto skip_msg_init;
+
        /* there's a good chance we're going to have to send a message, so set
         * one up in advance */
        msg.msg_name    = &call->conn->params.peer->srx.transport;
@@ -856,6 +862,7 @@ void rxrpc_process_call(struct work_struct *work)
        memset(iov, 0, sizeof(iov));
        iov[0].iov_base = &whdr;
        iov[0].iov_len  = sizeof(whdr);
+skip_msg_init:
 
        /* deal with events of a final nature */
        if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
index 91287c9..ae057e0 100644 (file)
@@ -275,6 +275,7 @@ error:
        list_del_init(&call->link);
        write_unlock_bh(&rxrpc_call_lock);
 
+       set_bit(RXRPC_CALL_RELEASED, &call->flags);
        call->state = RXRPC_CALL_DEAD;
        rxrpc_put_call(call);
        _leave(" = %d", ret);
@@ -287,6 +288,7 @@ error:
         */
 found_user_ID_now_present:
        write_unlock(&rx->call_lock);
+       set_bit(RXRPC_CALL_RELEASED, &call->flags);
        call->state = RXRPC_CALL_DEAD;
        rxrpc_put_call(call);
        _leave(" = -EEXIST [%p]", call);
@@ -491,15 +493,9 @@ void rxrpc_release_call(struct rxrpc_call *call)
                spin_lock_bh(&call->lock);
                while ((skb = skb_dequeue(&call->rx_queue)) ||
                       (skb = skb_dequeue(&call->rx_oos_queue))) {
-                       sp = rxrpc_skb(skb);
-                       if (sp->call) {
-                               ASSERTCMP(sp->call, ==, call);
-                               rxrpc_put_call(call);
-                               sp->call = NULL;
-                       }
-                       skb->destructor = NULL;
                        spin_unlock_bh(&call->lock);
 
+                       sp = rxrpc_skb(skb);
                        _debug("- zap %s %%%u #%u",
                               rxrpc_pkts[sp->hdr.type],
                               sp->hdr.serial, sp->hdr.seq);
@@ -605,6 +601,7 @@ void __rxrpc_put_call(struct rxrpc_call *call)
 
        if (atomic_dec_and_test(&call->usage)) {
                _debug("call %d dead", call->debug_id);
+               WARN_ON(atomic_read(&call->skb_count) != 0);
                ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
                rxrpc_queue_work(&call->destroyer);
        }
index 991a20d..70bb778 100644 (file)
@@ -55,9 +55,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
        if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
                _debug("already terminated");
                ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
-               skb->destructor = NULL;
-               sp->call = NULL;
-               rxrpc_put_call(call);
                rxrpc_free_skb(skb);
                return 0;
        }
@@ -111,13 +108,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
        ret = 0;
 
 out:
-       /* release the socket buffer */
-       if (skb) {
-               skb->destructor = NULL;
-               sp->call = NULL;
-               rxrpc_put_call(call);
-               rxrpc_free_skb(skb);
-       }
+       rxrpc_free_skb(skb);
 
        _leave(" = %d", ret);
        return ret;
@@ -133,11 +124,15 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
        struct rxrpc_skb_priv *sp;
        bool terminal;
        int ret, ackbit, ack;
+       u32 serial;
+       u8 flags;
 
        _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
 
        sp = rxrpc_skb(skb);
        ASSERTCMP(sp->call, ==, NULL);
+       flags = sp->hdr.flags;
+       serial = sp->hdr.serial;
 
        spin_lock(&call->lock);
 
@@ -200,8 +195,9 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
        sp->call = call;
        rxrpc_get_call(call);
-       terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-                   !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+       atomic_inc(&call->skb_count);
+       terminal = ((flags & RXRPC_LAST_PACKET) &&
+                   !(flags & RXRPC_CLIENT_INITIATED));
        ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
        if (ret < 0) {
                if (ret == -ENOMEM || ret == -ENOBUFS) {
@@ -213,12 +209,13 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
        }
 
        skb = NULL;
+       sp = NULL;
 
        _debug("post #%u", seq);
        ASSERTCMP(call->rx_data_post, ==, seq);
        call->rx_data_post++;
 
-       if (sp->hdr.flags & RXRPC_LAST_PACKET)
+       if (flags & RXRPC_LAST_PACKET)
                set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
 
        /* if we've reached an out of sequence packet then we need to drain
@@ -234,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
        spin_unlock(&call->lock);
        atomic_inc(&call->ackr_not_idle);
-       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
        _leave(" = 0 [posted]");
        return 0;
 
@@ -247,7 +244,7 @@ out:
 
 discard_and_ack:
        _debug("discard and ACK packet %p", skb);
-       __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+       __rxrpc_propose_ACK(call, ack, serial, true);
 discard:
        spin_unlock(&call->lock);
        rxrpc_free_skb(skb);
@@ -255,7 +252,7 @@ discard:
        return 0;
 
 enqueue_and_ack:
-       __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+       __rxrpc_propose_ACK(call, ack, serial, true);
 enqueue_packet:
        _net("defer skb %p", skb);
        spin_unlock(&call->lock);
@@ -575,13 +572,13 @@ done:
  * post connection-level events to the connection
  * - this includes challenges, responses and some aborts
  */
-static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
                                      struct sk_buff *skb)
 {
        _enter("%p,%p", conn, skb);
 
        skb_queue_tail(&conn->rx_queue, skb);
-       return rxrpc_queue_conn(conn);
+       rxrpc_queue_conn(conn);
 }
 
 /*
@@ -702,7 +699,6 @@ void rxrpc_data_ready(struct sock *sk)
 
        rcu_read_lock();
 
-retry_find_conn:
        conn = rxrpc_find_connection_rcu(local, skb);
        if (!conn)
                goto cant_route_call;
@@ -710,8 +706,7 @@ retry_find_conn:
        if (sp->hdr.callNumber == 0) {
                /* Connection-level packet */
                _debug("CONN %p {%d}", conn, conn->debug_id);
-               if (!rxrpc_post_packet_to_conn(conn, skb))
-                       goto retry_find_conn;
+               rxrpc_post_packet_to_conn(conn, skb);
        } else {
                /* Call-bound packets are routed by connection channel. */
                unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
@@ -749,6 +744,8 @@ cant_route_call:
        if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
                _debug("reject type %d",sp->hdr.type);
                rxrpc_reject_packet(local, skb);
+       } else {
+               rxrpc_free_skb(skb);
        }
        _leave(" [no call]");
        return;
index a3fa2ed..9ed66d5 100644 (file)
@@ -203,6 +203,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                }
 
                /* we transferred the whole data packet */
+               if (!(flags & MSG_PEEK))
+                       rxrpc_kernel_data_consumed(call, skb);
+
                if (sp->hdr.flags & RXRPC_LAST_PACKET) {
                        _debug("last");
                        if (rxrpc_conn_is_client(call->conn)) {
@@ -359,28 +362,6 @@ wait_error:
 
 }
 
-/**
- * rxrpc_kernel_data_delivered - Record delivery of data message
- * @skb: Message holding data
- *
- * Record the delivery of a data message.  This permits RxRPC to keep its
- * tracking correct.  The socket buffer will be deleted.
- */
-void rxrpc_kernel_data_delivered(struct sk_buff *skb)
-{
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct rxrpc_call *call = sp->call;
-
-       ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-       ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-       call->rx_data_recv = sp->hdr.seq;
-
-       ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-       rxrpc_free_skb(skb);
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
-
 /**
  * rxrpc_kernel_is_data_last - Determine if data message is last one
  * @skb: Message holding data
index eee0cfd..06c51d4 100644 (file)
@@ -98,11 +98,39 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
        spin_unlock_bh(&call->lock);
 }
 
+/**
+ * rxrpc_kernel_data_consumed - Record consumption of data message
+ * @call: The call to which the message pertains.
+ * @skb: Message holding data
+ *
+ * Record the consumption of a data message and generate an ACK if appropriate.
+ * The call state is shifted if this was the final packet.  The caller must be
+ * in process context with no spinlocks held.
+ *
+ * TODO: Actually generate the ACK here rather than punting this to the
+ * workqueue.
+ */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+       _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
+
+       ASSERTCMP(sp->call, ==, call);
+       ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
+
+       /* TODO: Fix the sequence number tracking */
+       ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
+       ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
+       ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+
+       call->rx_data_recv = sp->hdr.seq;
+       rxrpc_hard_ACK_data(call, sp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
+
 /*
- * destroy a packet that has an RxRPC control buffer
- * - advance the hard-ACK state of the parent call (done here in case something
- *   in the kernel bypasses recvmsg() and steals the packet directly off of the
- *   socket receive queue)
+ * Destroy a packet that has an RxRPC control buffer
  */
 void rxrpc_packet_destructor(struct sk_buff *skb)
 {
@@ -112,9 +140,8 @@ void rxrpc_packet_destructor(struct sk_buff *skb)
        _enter("%p{%p}", skb, call);
 
        if (call) {
-               /* send the final ACK on a client call */
-               if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
-                       rxrpc_hard_ACK_data(call, sp);
+               if (atomic_dec_return(&call->skb_count) < 0)
+                       BUG();
                rxrpc_put_call(call);
                sp->call = NULL;
        }
index e4a5f26..d09d068 100644 (file)
@@ -64,7 +64,6 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
                if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
                        if (p->ops->cleanup)
                                p->ops->cleanup(p, bind);
-                       list_del(&p->list);
                        tcf_hash_destroy(p->hinfo, p);
                        ret = ACT_P_DELETED;
                }
@@ -421,18 +420,19 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
        return res;
 }
 
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-                   struct tcf_result *res)
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+                   int nr_actions, struct tcf_result *res)
 {
-       const struct tc_action *a;
-       int ret = -1;
+       int ret = -1, i;
 
        if (skb->tc_verd & TC_NCLS) {
                skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
                ret = TC_ACT_OK;
                goto exec_done;
        }
-       list_for_each_entry(a, actions, list) {
+       for (i = 0; i < nr_actions; i++) {
+               const struct tc_action *a = actions[i];
+
 repeat:
                ret = a->ops->act(skb, a, res);
                if (ret == TC_ACT_REPEAT)
@@ -754,16 +754,6 @@ err_out:
        return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions)
-{
-       struct tc_action *a, *tmp;
-
-       list_for_each_entry_safe(a, tmp, actions, list) {
-               list_del(&a->list);
-               kfree(a);
-       }
-}
-
 static int tca_action_flush(struct net *net, struct nlattr *nla,
                            struct nlmsghdr *n, u32 portid)
 {
@@ -905,7 +895,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                return ret;
        }
 err:
-       cleanup_a(&actions);
+       tcf_action_destroy(&actions, 0);
        return ret;
 }
 
@@ -942,15 +932,9 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
        ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
        if (ret)
-               goto done;
+               return ret;
 
-       /* dump then free all the actions after update; inserted policy
-        * stays intact
-        */
-       ret = tcf_add_notify(net, n, &actions, portid);
-       cleanup_a(&actions);
-done:
-       return ret;
+       return tcf_add_notify(net, n, &actions, portid);
 }
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
index 141a06e..e87cd81 100644 (file)
@@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
        u32 *tlv = (u32 *)(skbdata);
        u16 totlen = nla_total_size(dlen);      /*alignment + hdr */
        char *dptr = (char *)tlv + NLA_HDRLEN;
-       u32 htlv = attrtype << 16 | totlen;
+       u32 htlv = attrtype << 16 | dlen;
 
        *tlv = htonl(htlv);
        memset(dptr, 0, totlen - NLA_HDRLEN);
@@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen);
 
 int ife_validate_meta_u32(void *val, int len)
 {
-       if (len == 4)
+       if (len == sizeof(u32))
                return 0;
 
        return -EINVAL;
@@ -144,8 +144,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32);
 
 int ife_validate_meta_u16(void *val, int len)
 {
-       /* length will include padding */
-       if (len == NLA_ALIGN(2))
+       /* length will not include padding */
+       if (len == sizeof(u16))
                return 0;
 
        return -EINVAL;
@@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
                u8 *tlvdata = (u8 *)tlv;
                u16 mtype = tlv->type;
                u16 mlen = tlv->len;
+               u16 alen;
 
                mtype = ntohs(mtype);
                mlen = ntohs(mlen);
+               alen = NLA_ALIGN(mlen);
 
-               if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
-                                      (void *)(tlvdata + 4))) {
+               if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
+                                      (void *)(tlvdata + NLA_HDRLEN))) {
                        /* abuse overlimits to count when we receive metadata
                         * but dont have an ops for it
                         */
@@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
                        ife->tcf_qstats.overlimits++;
                }
 
-               tlvdata += mlen;
-               ifehdrln -= mlen;
+               tlvdata += alen;
+               ifehdrln -= alen;
                tlv = (struct meta_tlvhdr *)tlvdata;
        }
 
index b3c7e97..8a3be1d 100644 (file)
@@ -63,49 +63,8 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
                                 const struct tc_action_ops *ops)
 {
        struct tc_action_net *tn = net_generic(net, police_net_id);
-       struct tcf_hashinfo *hinfo = tn->hinfo;
-       int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-       struct nlattr *nest;
-
-       spin_lock_bh(&hinfo->lock);
-
-       s_i = cb->args[0];
-
-       for (i = 0; i < (POL_TAB_MASK + 1); i++) {
-               struct hlist_head *head;
-               struct tc_action *p;
-
-               head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
-
-               hlist_for_each_entry_rcu(p, head, tcfa_head) {
-                       index++;
-                       if (index < s_i)
-                               continue;
-                       nest = nla_nest_start(skb, index);
-                       if (nest == NULL)
-                               goto nla_put_failure;
-                       if (type == RTM_DELACTION)
-                               err = tcf_action_dump_1(skb, p, 0, 1);
-                       else
-                               err = tcf_action_dump_1(skb, p, 0, 0);
-                       if (err < 0) {
-                               index--;
-                               nla_nest_cancel(skb, nest);
-                               goto done;
-                       }
-                       nla_nest_end(skb, nest);
-                       n_i++;
-               }
-       }
-done:
-       spin_unlock_bh(&hinfo->lock);
-       if (n_i)
-               cb->args[0] += n_i;
-       return n_i;
 
-nla_put_failure:
-       nla_nest_cancel(skb, nest);
-       goto done;
+       return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -125,6 +84,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
        struct tcf_police *police;
        struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
        struct tc_action_net *tn = net_generic(net, police_net_id);
+       bool exists = false;
        int size;
 
        if (nla == NULL)
@@ -139,24 +99,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
        size = nla_len(tb[TCA_POLICE_TBF]);
        if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
                return -EINVAL;
+
        parm = nla_data(tb[TCA_POLICE_TBF]);
+       exists = tcf_hash_check(tn, parm->index, a, bind);
+       if (exists && bind)
+               return 0;
 
-       if (parm->index) {
-               if (tcf_hash_check(tn, parm->index, a, bind)) {
-                       if (ovr)
-                               goto override;
-                       /* not replacing */
-                       return -EEXIST;
-               }
-       } else {
+       if (!exists) {
                ret = tcf_hash_create(tn, parm->index, NULL, a,
                                      &act_police_ops, bind, false);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
+       } else {
+               tcf_hash_release(*a, bind);
+               if (!ovr)
+                       return -EEXIST;
        }
 
-override:
        police = to_police(*a);
        if (parm->rate.rate) {
                err = -ENOMEM;
index 843a716..a7c5645 100644 (file)
@@ -541,8 +541,12 @@ out:
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
-       INIT_LIST_HEAD(&exts->actions);
+       LIST_HEAD(actions);
+
+       tcf_exts_to_list(exts, &actions);
+       tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+       kfree(exts->actions);
+       exts->nr_actions = 0;
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
@@ -554,7 +558,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
        {
                struct tc_action *act;
 
-               INIT_LIST_HEAD(&exts->actions);
                if (exts->police && tb[exts->police]) {
                        act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
                                                "police", ovr,
@@ -563,14 +566,20 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                                return PTR_ERR(act);
 
                        act->type = exts->type = TCA_OLD_COMPAT;
-                       list_add(&act->list, &exts->actions);
+                       exts->actions[0] = act;
+                       exts->nr_actions = 1;
                } else if (exts->action && tb[exts->action]) {
-                       int err;
+                       LIST_HEAD(actions);
+                       int err, i = 0;
+
                        err = tcf_action_init(net, tb[exts->action], rate_tlv,
                                              NULL, ovr,
-                                             TCA_ACT_BIND, &exts->actions);
+                                             TCA_ACT_BIND, &actions);
                        if (err)
                                return err;
+                       list_for_each_entry(act, &actions, list)
+                               exts->actions[i++] = act;
+                       exts->nr_actions = i;
                }
        }
 #else
@@ -587,37 +596,49 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
                     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       LIST_HEAD(tmp);
+       struct tcf_exts old = *dst;
+
        tcf_tree_lock(tp);
-       list_splice_init(&dst->actions, &tmp);
-       list_splice(&src->actions, &dst->actions);
+       dst->nr_actions = src->nr_actions;
+       dst->actions = src->actions;
        dst->type = src->type;
        tcf_tree_unlock(tp);
-       tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
+
+       tcf_exts_destroy(&old);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_change);
 
-#define tcf_exts_first_act(ext)                                        \
-       list_first_entry_or_null(&(exts)->actions,              \
-                                struct tc_action, list)
+#ifdef CONFIG_NET_CLS_ACT
+static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
+{
+       if (exts->nr_actions == 0)
+               return NULL;
+       else
+               return exts->actions[0];
+}
+#endif
 
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
        struct nlattr *nest;
 
-       if (exts->action && !list_empty(&exts->actions)) {
+       if (exts->action && exts->nr_actions) {
                /*
                 * again for backward compatible mode - we want
                 * to work with both old and new modes of entering
                 * tc data even if iproute2  was newer - jhs
                 */
                if (exts->type != TCA_OLD_COMPAT) {
+                       LIST_HEAD(actions);
+
                        nest = nla_nest_start(skb, exts->action);
                        if (nest == NULL)
                                goto nla_put_failure;
-                       if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
+
+                       tcf_exts_to_list(exts, &actions);
+                       if (tcf_action_dump(skb, &actions, 0, 0) < 0)
                                goto nla_put_failure;
                        nla_nest_end(skb, nest);
                } else if (exts->police) {
index e95b67c..657c133 100644 (file)
@@ -643,18 +643,19 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
        struct Qdisc *sch;
 
        if (!try_module_get(ops->owner))
-               goto errout;
+               return NULL;
 
        sch = qdisc_alloc(dev_queue, ops);
-       if (IS_ERR(sch))
-               goto errout;
+       if (IS_ERR(sch)) {
+               module_put(ops->owner);
+               return NULL;
+       }
        sch->parent = parentid;
 
        if (!ops->init || ops->init(sch, NULL) == 0)
                return sch;
 
        qdisc_destroy(sch);
-errout:
        return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
index c182db7..69444d3 100644 (file)
@@ -119,7 +119,13 @@ int sctp_rcv(struct sk_buff *skb)
                       skb_transport_offset(skb))
                goto discard_it;
 
-       if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+       /* If the packet is fragmented and we need to do crc checking,
+        * it's better to just linearize it otherwise crc computing
+        * takes longer.
+        */
+       if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+            skb_linearize(skb)) ||
+           !pskb_may_pull(skb, sizeof(struct sctphdr)))
                goto discard_it;
 
        /* Pull up the IP header. */
@@ -1177,9 +1183,6 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
        if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
                return NULL;
 
-       if (skb_linearize(skb))
-               return NULL;
-
        ch = (sctp_chunkhdr_t *) skb->data;
 
        /* The code below will attempt to walk the chunk and extract
index c30ddb0..6437aa9 100644 (file)
@@ -170,19 +170,6 @@ next_chunk:
 
                chunk = list_entry(entry, struct sctp_chunk, list);
 
-               /* Linearize if it's not GSO */
-               if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
-                   skb_is_nonlinear(chunk->skb)) {
-                       if (skb_linearize(chunk->skb)) {
-                               __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
-                               sctp_chunk_free(chunk);
-                               goto next_chunk;
-                       }
-
-                       /* Update sctp_hdr as it probably changed */
-                       chunk->sctp_hdr = sctp_hdr(chunk->skb);
-               }
-
                if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
                        /* GSO-marked skbs but without frags, handle
                         * them normally
index 4cb5aed..ef8ba77 100644 (file)
@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
                return ERR_PTR(err);
        }
 
+       iter->start_fail = 0;
        return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
 }
 
index f69edcf..f3508aa 100644 (file)
@@ -13,6 +13,7 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
 {
        union sctp_addr laddr, paddr;
        struct dst_entry *dst;
+       struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
 
        laddr = list_entry(asoc->base.bind_addr.address_list.next,
                           struct sctp_sockaddr_entry, list)->a;
@@ -40,10 +41,15 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
        }
 
        r->idiag_state = asoc->state;
-       r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
-       r->idiag_retrans = asoc->rtx_data_chunks;
-       r->idiag_expires = jiffies_to_msecs(
-               asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies);
+       if (timer_pending(t3_rtx)) {
+               r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+               r->idiag_retrans = asoc->rtx_data_chunks;
+               r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+       } else {
+               r->idiag_timer = 0;
+               r->idiag_retrans = 0;
+               r->idiag_expires = 0;
+       }
 }
 
 static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
@@ -350,7 +356,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
        if (cb->args[4] < cb->args[1])
                goto next;
 
-       if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs))
+       if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
                goto next;
 
        if (r->sdiag_family != AF_UNSPEC &&
@@ -418,11 +424,13 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
                paddr.v4.sin_family = AF_INET;
        } else {
                laddr.v6.sin6_port = req->id.idiag_sport;
-               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64);
+               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
+                      sizeof(laddr.v6.sin6_addr));
                laddr.v6.sin6_family = AF_INET6;
 
                paddr.v6.sin6_port = req->id.idiag_dport;
-               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64);
+               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
+                      sizeof(paddr.v6.sin6_addr));
                paddr.v6.sin6_family = AF_INET6;
        }
 
@@ -465,7 +473,7 @@ skip:
         * 3 : to mark if we have dumped the ep info of the current asoc
         * 4 : to work as a temporary variable to traversal list
         */
-       if (!(idiag_states & ~TCPF_LISTEN))
+       if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
                goto done;
        sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
 done:
index 1bc4f71..d85b803 100644 (file)
@@ -702,14 +702,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
         */
        sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
 
-       sctp_ulpevent_receive_data(event, asoc);
-
        /* And hold the chunk as we need it for getting the IP headers
         * later in recvmsg
         */
        sctp_chunk_hold(chunk);
        event->chunk = chunk;
 
+       sctp_ulpevent_receive_data(event, asoc);
+
        event->stream = ntohs(chunk->subh.data_hdr->stream);
        event->ssn = ntohs(chunk->subh.data_hdr->ssn);
        event->ppid = chunk->subh.data_hdr->ppid;
index 23c8e7c..976c781 100644 (file)
@@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
 {
        struct gss_upcall_msg *pos;
        list_for_each_entry(pos, &pipe->in_downcall, list) {
                if (!uid_eq(pos->uid, uid))
                        continue;
+               if (auth && pos->auth->service != auth->service)
+                       continue;
                atomic_inc(&pos->count);
                dprintk("RPC:       %s found msg %p\n", __func__, pos);
                return pos;
@@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg)
        struct gss_upcall_msg *old;
 
        spin_lock(&pipe->lock);
-       old = __gss_find_upcall(pipe, gss_msg->uid);
+       old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
        if (old == NULL) {
                atomic_inc(&gss_msg->count);
                list_add(&gss_msg->list, &pipe->in_downcall);
@@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        err = -ENOENT;
        /* Find a matching upcall */
        spin_lock(&pipe->lock);
-       gss_msg = __gss_find_upcall(pipe, uid);
+       gss_msg = __gss_find_upcall(pipe, uid, NULL);
        if (gss_msg == NULL) {
                spin_unlock(&pipe->lock);
                goto err_put_ctx;
index cb49898..7f79fb7 100644 (file)
@@ -2638,6 +2638,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 {
        struct rpc_xprt_switch *xps;
        struct rpc_xprt *xprt;
+       unsigned long reconnect_timeout;
        unsigned char resvport;
        int ret = 0;
 
@@ -2649,6 +2650,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                return -EAGAIN;
        }
        resvport = xprt->resvport;
+       reconnect_timeout = xprt->max_reconnect_timeout;
        rcu_read_unlock();
 
        xprt = xprt_create_transport(xprtargs);
@@ -2657,6 +2659,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                goto out_put_switch;
        }
        xprt->resvport = resvport;
+       xprt->max_reconnect_timeout = reconnect_timeout;
 
        rpc_xprt_switch_set_roundrobin(xps);
        if (setup) {
@@ -2673,6 +2676,27 @@ out_put_switch:
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
 
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+               struct rpc_xprt *xprt,
+               void *data)
+{
+       unsigned long timeout = *((unsigned long *)data);
+
+       if (timeout < xprt->max_reconnect_timeout)
+               xprt->max_reconnect_timeout = timeout;
+       return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+       rpc_clnt_iterate_for_each_xprt(clnt,
+                       rpc_xprt_cap_max_reconnect_timeout,
+                       &timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
index 8313960..ea244b2 100644 (file)
@@ -680,6 +680,20 @@ out:
        spin_unlock_bh(&xprt->transport_lock);
 }
 
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+       return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+       __must_hold(&xprt->transport_lock)
+{
+       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+               mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
 static void
 xprt_init_autodisconnect(unsigned long data)
 {
@@ -688,6 +702,8 @@ xprt_init_autodisconnect(unsigned long data)
        spin_lock(&xprt->transport_lock);
        if (!list_empty(&xprt->recv))
                goto out_abort;
+       /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+       xprt->last_used = jiffies;
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                goto out_abort;
        spin_unlock(&xprt->transport_lock);
@@ -725,6 +741,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
                goto out;
        xprt->snd_task =NULL;
        xprt->ops->release_xprt(xprt, NULL);
+       xprt_schedule_autodisconnect(xprt);
 out:
        spin_unlock_bh(&xprt->transport_lock);
        wake_up_bit(&xprt->state, XPRT_LOCKED);
@@ -888,11 +905,6 @@ static void xprt_timer(struct rpc_task *task)
        spin_unlock_bh(&xprt->transport_lock);
 }
 
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
-       return xprt->idle_timeout != 0;
-}
-
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1280,9 +1292,7 @@ void xprt_release(struct rpc_task *task)
        if (!list_empty(&req->rq_list))
                list_del(&req->rq_list);
        xprt->last_used = jiffies;
-       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
-               mod_timer(&xprt->timer,
-                               xprt->last_used + xprt->idle_timeout);
+       xprt_schedule_autodisconnect(xprt);
        spin_unlock_bh(&xprt->transport_lock);
        if (req->rq_buffer)
                xprt->ops->buf_free(req->rq_buffer);
index 111767a..8ede3bc 100644 (file)
@@ -177,7 +177,6 @@ static struct ctl_table sunrpc_table[] = {
  * increase over time if the server is down or not responding.
  */
 #define XS_TCP_INIT_REEST_TO   (3U * HZ)
-#define XS_TCP_MAX_REEST_TO    (5U * 60 * HZ)
 
 /*
  * TCP idle timeout; client drops the transport socket if it is idle
@@ -2173,6 +2172,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                write_unlock_bh(&sk->sk_callback_lock);
        }
        xs_udp_do_set_buffer_size(xprt);
+
+       xprt->stat.connect_start = jiffies;
 }
 
 static void xs_udp_setup_socket(struct work_struct *work)
@@ -2236,6 +2237,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                unsigned int keepcnt = xprt->timeout->to_retries + 1;
                unsigned int opt_on = 1;
                unsigned int timeo;
+               unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
                /* TCP Keepalive options */
                kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2247,6 +2249,16 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
                                (char *)&keepcnt, sizeof(keepcnt));
 
+               /* Avoid temporary address, they are bad for long-lived
+                * connections such as NFS mounts.
+                * RFC4941, section 3.6 suggests that:
+                *    Individual applications, which have specific
+                *    knowledge about the normal duration of connections,
+                *    MAY override this as appropriate.
+                */
+               kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+                               (char *)&addr_pref, sizeof(addr_pref));
+
                /* TCP user timeout (see RFC5482) */
                timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
                        (xprt->timeout->to_retries + 1);
@@ -2295,6 +2307,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                /* SYN_SENT! */
                if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+               break;
+       case -EADDRNOTAVAIL:
+               /* Source port number is unavailable. Try a new one! */
+               transport->srcport = 0;
        }
 out:
        return ret;
@@ -2369,6 +2385,25 @@ out:
        xprt_wake_pending_tasks(xprt, status);
 }
 
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+       unsigned long start, now = jiffies;
+
+       start = xprt->stat.connect_start + xprt->reestablish_timeout;
+       if (time_after(start, now))
+               return start - now;
+       return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+       xprt->reestablish_timeout <<= 1;
+       if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+               xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+       if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+               xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2386,6 +2421,7 @@ out:
 static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       unsigned long delay = 0;
 
        WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
 
@@ -2397,19 +2433,15 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                /* Start by resetting any existing state */
                xs_reset_transport(transport);
 
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker,
-                                  xprt->reestablish_timeout);
-               xprt->reestablish_timeout <<= 1;
-               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-               if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
-       } else {
+               delay = xs_reconnect_delay(xprt);
+               xs_reconnect_backoff(xprt);
+
+       } else
                dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker, 0);
-       }
+
+       queue_delayed_work(xprtiod_workqueue,
+                       &transport->connect_worker,
+                       delay);
 }
 
 /**
@@ -2961,6 +2993,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        xprt->ops = &xs_tcp_ops;
        xprt->timeout = &xs_tcp_default_timeout;
 
+       xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
        INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
        INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
 
index b62caa1..ed97a58 100644 (file)
@@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
                             u32 bearer_id, u32 *prev_node)
 {
        struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-       struct tipc_peer *peer = mon->self;
+       struct tipc_peer *peer;
 
        if (!mon)
                return -EINVAL;
 
        read_lock_bh(&mon->lock);
+       peer = mon->self;
        do {
                if (*prev_node) {
                        if (peer->addr == *prev_node)
index c49b8df..f9f5f3c 100644 (file)
@@ -2180,7 +2180,8 @@ restart:
                                              TIPC_CONN_MSG, SHORT_H_SIZE,
                                              0, dnode, onode, dport, oport,
                                              TIPC_CONN_SHUTDOWN);
-                       tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+                       if (skb)
+                               tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
                }
                tsk->connected = 0;
                sock->state = SS_DISCONNECTING;
index b016c01..ae7e14c 100644 (file)
@@ -396,10 +396,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
        tuncfg.encap_destroy = NULL;
        setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
-       if (enable_mcast(ub, remote))
+       err = enable_mcast(ub, remote);
+       if (err)
                goto err;
        return 0;
 err:
+       if (ub->ubsock)
+               udp_tunnel_sock_release(ub->ubsock);
        kfree(ub);
        return err;
 }
index 699dfab..936d7ee 100644 (file)
@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
 
        vq = vsock->vqs[VSOCK_VQ_TX];
 
-       /* Avoid unnecessary interrupts while we're processing the ring */
-       virtqueue_disable_cb(vq);
-
        for (;;) {
                struct virtio_vsock_pkt *pkt;
                struct scatterlist hdr, buf, *sgs[2];
@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                spin_lock_bh(&vsock->send_pkt_list_lock);
                if (list_empty(&vsock->send_pkt_list)) {
                        spin_unlock_bh(&vsock->send_pkt_list_lock);
-                       virtqueue_enable_cb(vq);
                        break;
                }
 
@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                }
 
                ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+               /* Usually this means that there is no more space available in
+                * the vq
+                */
                if (ret < 0) {
                        spin_lock_bh(&vsock->send_pkt_list_lock);
                        list_add(&pkt->list, &vsock->send_pkt_list);
                        spin_unlock_bh(&vsock->send_pkt_list_lock);
-
-                       if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
-                               continue; /* retry now that we have more space */
                        break;
                }
 
index b0e11b6..0f50622 100644 (file)
@@ -513,6 +513,7 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
                r = cfg80211_get_chans_dfs_available(wiphy,
                                                     chandef->center_freq2,
                                                     width);
+               break;
        default:
                WARN_ON(chandef->center_freq2);
                break;
index 46417f9..f02653a 100644 (file)
@@ -5380,6 +5380,7 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
 {
        struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
        u32 mask = 0;
+       u16 ht_opmode;
 
 #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
 do {                                                                       \
@@ -5471,9 +5472,36 @@ do {                                                                         \
        FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
                                  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
                                  nl80211_check_s32);
-       FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
-                                 mask, NL80211_MESHCONF_HT_OPMODE,
-                                 nl80211_check_u16);
+       /*
+        * Check HT operation mode based on
+        * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+        */
+       if (tb[NL80211_MESHCONF_HT_OPMODE]) {
+               ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
+
+               if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION |
+                                 IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+                                 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                       return -EINVAL;
+
+               if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
+                   (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                       return -EINVAL;
+
+               switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
+               case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
+                       if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
+                               return -EINVAL;
+                       break;
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
+                       if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                               return -EINVAL;
+                       break;
+               }
+               cfg->ht_opmode = ht_opmode;
+       }
        FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
                                  1, 65535, mask,
                                  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
index 217c8d5..7927a09 100644 (file)
@@ -72,8 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
        (void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
        (void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
-       (void *) BPF_FUNC_skb_in_cgroup;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+       (void *) BPF_FUNC_skb_under_cgroup;
 
 #if defined(__x86_64__)
 
index 2732c37..10ff734 100644 (file)
@@ -57,7 +57,7 @@ int handle_egress(struct __sk_buff *skb)
                bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
                                 eth->h_proto, ip6h->nexthdr);
                return TC_ACT_OK;
-       } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+       } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
                bpf_trace_printk(pass_msg, sizeof(pass_msg));
                return TC_ACT_OK;
        } else {
index 47bf085..cce2b59 100644 (file)
@@ -68,7 +68,16 @@ static void test_hashmap_sanity(int i, void *data)
        assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
               errno == E2BIG);
 
+       /* update existing element, thought the map is full */
+       key = 1;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+       key = 2;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+       key = 1;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
        /* check that key = 0 doesn't exist */
+       key = 0;
        assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
 
        /* iterate over two elements */
@@ -413,10 +422,12 @@ static void do_work(int fn, void *data)
 
        for (i = fn; i < MAP_SIZE; i += TASKS) {
                key = value = i;
-               if (do_update)
+               if (do_update) {
                        assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
-               else
+                       assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+               } else {
                        assert(bpf_delete_elem(map_fd, &key) == 0);
+               }
        }
 }
 
index 15b196f..1792198 100644 (file)
@@ -108,16 +108,20 @@ as-option = $(call try-run,\
 as-instr = $(call try-run,\
        printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
+# Do not attempt to build with gcc plugins during cc-option tests.
+# (And this uses delayed resolution so the flags will be up to date.)
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
+
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+       $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+       $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -127,7 +131,7 @@ cc-option-align = $(subst -functions=0,,\
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+       $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-name
 # Expands to either gcc or clang
index 5e22b60..61f0e6d 100644 (file)
@@ -19,25 +19,42 @@ ifdef CONFIG_GCC_PLUGINS
     endif
   endif
 
-  GCC_PLUGINS_CFLAGS := $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y))
+  GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
 
-  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN SANCOV_PLUGIN
+  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR SANCOV_PLUGIN
 
+  ifneq ($(PLUGINCC),)
+    # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
+    GCC_PLUGINS_CFLAGS := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGINS_CFLAGS))
+  endif
+
+  KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS)
+  GCC_PLUGIN := $(gcc-plugin-y)
+  GCC_PLUGIN_SUBDIR := $(gcc-plugin-subdir-y)
+endif
+
+# If plugins aren't supported, abort the build before hard-to-read compiler
+# errors start getting spewed by the main build.
+PHONY += gcc-plugins-check
+gcc-plugins-check: FORCE
+ifdef CONFIG_GCC_PLUGINS
   ifeq ($(PLUGINCC),)
     ifneq ($(GCC_PLUGINS_CFLAGS),)
       ifeq ($(call cc-ifversion, -ge, 0405, y), y)
-        PLUGINCC := $(shell $(CONFIG_SHELL) -x $(srctree)/scripts/gcc-plugin.sh "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)")
-        $(warning warning: your gcc installation does not support plugins, perhaps the necessary headers are missing?)
+       $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true
+       @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1
       else
-        $(warning warning: your gcc version does not support plugins, you should upgrade it to gcc 4.5 at least)
+       @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc version does not support plugins, you should upgrade it to at least gcc 4.5" >&2 && exit 1
       endif
     endif
-  else
-    # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
-    GCC_PLUGINS_CFLAGS := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGINS_CFLAGS))
   endif
+endif
+       @:
 
-  KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS)
-  GCC_PLUGIN := $(gcc-plugin-y)
-
+# Actually do the build, if requested.
+PHONY += gcc-plugins
+gcc-plugins: scripts_basic gcc-plugins-check
+ifdef CONFIG_GCC_PLUGINS
+       $(Q)$(MAKE) $(build)=scripts/gcc-plugins
 endif
+       @:
index fb92075..b65224b 100755 (executable)
@@ -1,5 +1,12 @@
 #!/bin/sh
 srctree=$(dirname "$0")
+
+SHOW_ERROR=
+if [ "$1" = "--show-error" ] ; then
+       SHOW_ERROR=1
+       shift || true
+fi
+
 gccplugins_dir=$($3 -print-file-name=plugin)
 plugincc=$($1 -E -x c++ - -o /dev/null -I"${srctree}"/gcc-plugins -I"${gccplugins_dir}"/include 2>&1 <<EOF
 #include "gcc-common.h"
@@ -13,6 +20,9 @@ EOF
 
 if [ $? -ne 0 ]
 then
+       if [ -n "$SHOW_ERROR" ] ; then
+               echo "${plugincc}" >&2
+       fi
        exit 1
 fi
 
@@ -48,4 +58,8 @@ then
        echo "$2"
        exit 0
 fi
+
+if [ -n "$SHOW_ERROR" ] ; then
+       echo "${plugincc}" >&2
+fi
 exit 1
index 88c8ec4..8b29dc1 100644 (file)
@@ -12,16 +12,18 @@ else
   export HOST_EXTRACXXFLAGS
 endif
 
-export GCCPLUGINS_DIR HOSTLIBS
-
 ifneq ($(CFLAGS_KCOV), $(SANCOV_PLUGIN))
   GCC_PLUGIN := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGIN))
 endif
 
-$(HOSTLIBS)-y := $(GCC_PLUGIN)
+export HOSTLIBS
+
+$(HOSTLIBS)-y := $(foreach p,$(GCC_PLUGIN),$(if $(findstring /,$(p)),,$(p)))
 always := $($(HOSTLIBS)-y)
 
-cyc_complexity_plugin-objs := cyc_complexity_plugin.o
-sancov_plugin-objs := sancov_plugin.o
+$(foreach p,$($(HOSTLIBS)-y:%.so=%),$(eval $(p)-objs := $(p).o))
+
+subdir-y := $(GCC_PLUGIN_SUBDIR)
+subdir-  += $(GCC_PLUGIN_SUBDIR)
 
 clean-files += *.so
index 122fcda..aed4511 100755 (executable)
@@ -432,7 +432,7 @@ foreach my $file (@ARGV) {
            die "$P: file '${file}' not found\n";
        }
     }
-    if ($from_filename || vcs_file_exists($file)) {
+    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
        $file =~ s/^\Q${cur_path}\E//;  #strip any absolute path
        $file =~ s/^\Q${lk_path}\E//;   #or the path to the lk tree
        push(@files, $file);
@@ -2136,9 +2136,11 @@ sub vcs_file_exists {
 
     my $cmd = $VCS_cmds{"file_exists_cmd"};
     $cmd =~ s/(\$\w+)/$1/eeg;          # interpolate $cmd
-
+    $cmd .= " 2>&1";
     $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
 
+    return 0 if ($? != 0);
+
     return $exists;
 }
 
index 176758c..da10d9b 100644 (file)
@@ -118,6 +118,35 @@ config LSM_MMAP_MIN_ADDR
          this low address space will need the permission specific to the
          systems running LSM.
 
+config HAVE_HARDENED_USERCOPY_ALLOCATOR
+       bool
+       help
+         The heap allocator implements __check_heap_object() for
+         validating memory ranges against heap object sizes in
+         support of CONFIG_HARDENED_USERCOPY.
+
+config HAVE_ARCH_HARDENED_USERCOPY
+       bool
+       help
+         The architecture supports CONFIG_HARDENED_USERCOPY by
+         calling check_object_size() just before performing the
+         userspace copies in the low level implementation of
+         copy_to_user() and copy_from_user().
+
+config HARDENED_USERCOPY
+       bool "Harden memory copies between kernel and userspace"
+       depends on HAVE_ARCH_HARDENED_USERCOPY
+       depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
+       select BUG
+       help
+         This option checks for obviously wrong memory regions when
+         copying memory to/from the kernel (via copy_to_user() and
+         copy_from_user() functions) by rejecting memory ranges that
+         are larger than the specified heap object, span multiple
+         separately allocates pages, are not on the process stack,
+         or are part of the kernel text. This kills entire classes
+         of heap overflow exploits and similar kernel memory exposures.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
index 89dacf9..160c7f7 100644 (file)
@@ -906,20 +906,23 @@ static int azx_resume(struct device *dev)
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip;
        struct hda_intel *hda;
+       struct hdac_bus *bus;
 
        if (!card)
                return 0;
 
        chip = card->private_data;
        hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
        if (chip->disabled || hda->init_failed || !chip->running)
                return 0;
 
-       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
-               && hda->need_i915_power) {
-               snd_hdac_display_power(azx_bus(chip), true);
-               snd_hdac_i915_set_bclk(azx_bus(chip));
+       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
+                       snd_hdac_i915_set_bclk(bus);
        }
+
        if (chip->msi)
                if (pci_enable_msi(pci) < 0)
                        chip->msi = 0;
@@ -929,6 +932,11 @@ static int azx_resume(struct device *dev)
 
        hda_intel_init_chip(chip, true);
 
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
        snd_power_change_state(card, SNDRV_CTL_POWER_D0);
 
        trace_azx_resume(chip);
@@ -1008,6 +1016,7 @@ static int azx_runtime_resume(struct device *dev)
 
        chip = card->private_data;
        hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
        if (chip->disabled || hda->init_failed)
                return 0;
 
@@ -1015,15 +1024,9 @@ static int azx_runtime_resume(struct device *dev)
                return 0;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
-               bus = azx_bus(chip);
-               if (hda->need_i915_power) {
-                       snd_hdac_display_power(bus, true);
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
                        snd_hdac_i915_set_bclk(bus);
-               } else {
-                       /* toggle codec wakeup bit for STATESTS read */
-                       snd_hdac_set_codec_wakeup(bus, true);
-                       snd_hdac_set_codec_wakeup(bus, false);
-               }
        }
 
        /* Read STATESTS before controller reset */
@@ -1043,6 +1046,11 @@ static int azx_runtime_resume(struct device *dev)
        azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
                        ~STATESTS_INT_MASK);
 
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
        trace_azx_runtime_resume(chip);
        return 0;
 }
index 574b1b4..7100f05 100644 (file)
@@ -4828,7 +4828,7 @@ enum {
        ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC292_FIXUP_TPT440_DOCK,
        ALC292_FIXUP_TPT440,
-       ALC283_FIXUP_BXBT2807_MIC,
+       ALC283_FIXUP_HEADSET_MIC,
        ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
        ALC282_FIXUP_ASPIRE_V5_PINS,
        ALC280_FIXUP_HP_GPIO4,
@@ -5321,7 +5321,7 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC292_FIXUP_TPT440_DOCK,
        },
-       [ALC283_FIXUP_BXBT2807_MIC] = {
+       [ALC283_FIXUP_HEADSET_MIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
                        { 0x19, 0x04a110f0 },
@@ -5651,7 +5651,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
-       SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
+       SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
index 54c09ac..16e459a 100644 (file)
@@ -299,8 +299,9 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
        clk_enable(ssc_p->ssc->clk);
        ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk);
 
-       /* Reset the SSC to keep it at a clean status */
-       ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
+       /* Reset the SSC unless initialized to keep it in a clean state */
+       if (!ssc_p->initialized)
+               ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
 
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
                dir = 0;
index e5527bc..bcf1834 100644 (file)
@@ -1247,8 +1247,8 @@ static int da7213_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
                return -EINVAL;
        }
 
-       /* By default only 32 BCLK per WCLK is supported */
-       dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_32;
+       /* By default only 64 BCLK per WCLK is supported */
+       dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_64;
 
        snd_soc_write(codec, DA7213_DAI_CLK_MODE, dai_clk_mode);
        snd_soc_update_bits(codec, DA7213_DAI_CTRL, DA7213_DAI_FORMAT_MASK,
index cf0a39b..02352ed 100644 (file)
@@ -412,6 +412,7 @@ static int max98371_i2c_remove(struct i2c_client *client)
 
 static const struct i2c_device_id max98371_i2c_id[] = {
        { "max98371", 0 },
+       { }
 };
 
 MODULE_DEVICE_TABLE(i2c, max98371_i2c_id);
index 5c9707a..2e59a85 100644 (file)
@@ -212,31 +212,6 @@ static const unsigned short logtable[256] = {
        0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47
 };
 
-static struct snd_soc_dai *nau8825_get_codec_dai(struct nau8825 *nau8825)
-{
-       struct snd_soc_codec *codec = snd_soc_dapm_to_codec(nau8825->dapm);
-       struct snd_soc_component *component = &codec->component;
-       struct snd_soc_dai *codec_dai, *_dai;
-
-       list_for_each_entry_safe(codec_dai, _dai, &component->dai_list, list) {
-               if (!strncmp(codec_dai->name, NUVOTON_CODEC_DAI,
-                       strlen(NUVOTON_CODEC_DAI)))
-                       return codec_dai;
-       }
-       return NULL;
-}
-
-static bool nau8825_dai_is_active(struct nau8825 *nau8825)
-{
-       struct snd_soc_dai *codec_dai = nau8825_get_codec_dai(nau8825);
-
-       if (codec_dai) {
-               if (codec_dai->playback_active || codec_dai->capture_active)
-                       return true;
-       }
-       return false;
-}
-
 /**
  * nau8825_sema_acquire - acquire the semaphore of nau88l25
  * @nau8825:  component to register the codec private data with
@@ -250,19 +225,26 @@ static bool nau8825_dai_is_active(struct nau8825 *nau8825)
  * Acquires the semaphore without jiffies. If no more tasks are allowed
  * to acquire the semaphore, calling this function will put the task to
  * sleep until the semaphore is released.
- * It returns if the semaphore was acquired.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME.
+ * If the sleep is interrupted by a signal, this function will return -EINTR.
+ * It returns 0 if the semaphore was acquired successfully.
  */
-static void nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
+static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
 {
        int ret;
 
-       if (timeout)
+       if (timeout) {
                ret = down_timeout(&nau8825->xtalk_sem, timeout);
-       else
+               if (ret < 0)
+                       dev_warn(nau8825->dev, "Acquire semaphone timeout\n");
+       } else {
                ret = down_interruptible(&nau8825->xtalk_sem);
+               if (ret < 0)
+                       dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+       }
 
-       if (ret < 0)
-               dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+       return ret;
 }
 
 /**
@@ -1205,6 +1187,8 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream,
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
        unsigned int val_len = 0;
 
+       nau8825_sema_acquire(nau8825, 2 * HZ);
+
        switch (params_width(params)) {
        case 16:
                val_len |= NAU8825_I2S_DL_16;
@@ -1225,6 +1209,9 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream,
        regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1,
                NAU8825_I2S_DL_MASK, val_len);
 
+       /* Release the semaphone. */
+       nau8825_sema_release(nau8825);
+
        return 0;
 }
 
@@ -1234,6 +1221,8 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
        unsigned int ctrl1_val = 0, ctrl2_val = 0;
 
+       nau8825_sema_acquire(nau8825, 2 * HZ);
+
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
        case SND_SOC_DAIFMT_CBM_CFM:
                ctrl2_val |= NAU8825_I2S_MS_MASTER;
@@ -1282,6 +1271,9 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
        regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
                NAU8825_I2S_MS_MASK, ctrl2_val);
 
+       /* Release the semaphone. */
+       nau8825_sema_release(nau8825);
+
        return 0;
 }
 
@@ -1611,8 +1603,11 @@ static irqreturn_t nau8825_interrupt(int irq, void *data)
                                         * cess and restore changes if process
                                         * is ongoing when ejection.
                                         */
+                                       int ret;
                                        nau8825->xtalk_protect = true;
-                                       nau8825_sema_acquire(nau8825, 0);
+                                       ret = nau8825_sema_acquire(nau8825, 0);
+                                       if (ret < 0)
+                                               nau8825->xtalk_protect = false;
                                }
                                /* Startup cross talk detection process */
                                nau8825->xtalk_state = NAU8825_XTALK_PREPARE;
@@ -2238,23 +2233,14 @@ static int __maybe_unused nau8825_suspend(struct snd_soc_codec *codec)
 static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec)
 {
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
+       int ret;
 
        regcache_cache_only(nau8825->regmap, false);
        regcache_sync(nau8825->regmap);
-       if (nau8825_is_jack_inserted(nau8825->regmap)) {
-               /* If the jack is inserted, we need to check whether the play-
-                * back is active before suspend. If active, the driver has to
-                * raise the protection for cross talk function to avoid the
-                * playback recovers before cross talk process finish. Other-
-                * wise, the playback will be interfered by cross talk func-
-                * tion. It is better to apply hardware related parameters
-                * before starting playback or record.
-                */
-               if (nau8825_dai_is_active(nau8825)) {
-                       nau8825->xtalk_protect = true;
-                       nau8825_sema_acquire(nau8825, 0);
-               }
-       }
+       nau8825->xtalk_protect = true;
+       ret = nau8825_sema_acquire(nau8825, 0);
+       if (ret < 0)
+               nau8825->xtalk_protect = false;
        enable_irq(nau8825->irq);
 
        return 0;
index a67ea10..f266439 100644 (file)
@@ -581,7 +581,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000,
        if (anc_transitions[i].dest == ANC_OFF)
                clk_disable_unprepare(wm2000->mclk);
 
-       return ret;
+       return 0;
 }
 
 static int wm2000_anc_set_mode(struct wm2000_priv *wm2000)
index 45602ca..2d53c8d 100644 (file)
@@ -1,5 +1,5 @@
-obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) := simple-card-utils.o
-
+snd-soc-simple-card-utils-objs := simple-card-utils.o
 snd-soc-simple-card-objs       := simple-card.o
 
-obj-$(CONFIG_SND_SIMPLE_CARD)  += snd-soc-simple-card.o
+obj-$(CONFIG_SND_SIMPLE_CARD_UTILS)    += snd-soc-simple-card-utils.o
+obj-$(CONFIG_SND_SIMPLE_CARD)          += snd-soc-simple-card.o
index d89a9a1..9599de6 100644 (file)
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/module.h>
 #include <linux/of.h>
 #include <sound/simple_card_utils.h>
 
@@ -95,3 +96,8 @@ int asoc_simple_card_parse_card_name(struct snd_soc_card *card,
        return 0;
 }
 EXPORT_SYMBOL_GPL(asoc_simple_card_parse_card_name);
+
+/* Module information */
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
+MODULE_DESCRIPTION("ALSA SoC Simple Card Utils");
+MODULE_LICENSE("GPL v2");
index 25fcb79..ddcb52a 100644 (file)
@@ -123,6 +123,11 @@ int snd_skl_get_module_info(struct skl_sst *ctx, u8 *uuid,
 
        uuid_mod = (uuid_le *)uuid;
 
+       if (list_empty(&ctx->uuid_list)) {
+               dev_err(ctx->dev, "Module list is empty\n");
+               return -EINVAL;
+       }
+
        list_for_each_entry(module, &ctx->uuid_list, list) {
                if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) {
                        dfw_config->module_id = module->id;
index cd59536..e3e7641 100644 (file)
@@ -672,8 +672,10 @@ static int skl_probe(struct pci_dev *pci,
 
        skl->nhlt = skl_nhlt_init(bus->dev);
 
-       if (skl->nhlt == NULL)
+       if (skl->nhlt == NULL) {
+               err = -ENODEV;
                goto out_free;
+       }
 
        skl_nhlt_update_topology_bin(skl);
 
index 0843a68..f61b3b5 100644 (file)
 struct abe_twl6040 {
        int     jack_detection; /* board can detect jack events */
        int     mclk_freq;      /* MCLK frequency speed for twl6040 */
-
-       struct platform_device *dmic_codec_dev;
 };
 
+struct platform_device *dmic_codec_dev;
+
 static int omap_abe_hw_params(struct snd_pcm_substream *substream,
        struct snd_pcm_hw_params *params)
 {
@@ -258,8 +258,6 @@ static int omap_abe_probe(struct platform_device *pdev)
        if (priv == NULL)
                return -ENOMEM;
 
-       priv->dmic_codec_dev = ERR_PTR(-EINVAL);
-
        if (snd_soc_of_parse_card_name(card, "ti,model")) {
                dev_err(&pdev->dev, "Card name is not provided\n");
                return -ENODEV;
@@ -284,13 +282,6 @@ static int omap_abe_probe(struct platform_device *pdev)
                num_links = 2;
                abe_twl6040_dai_links[1].cpu_of_node = dai_node;
                abe_twl6040_dai_links[1].platform_of_node = dai_node;
-
-               priv->dmic_codec_dev = platform_device_register_simple(
-                                               "dmic-codec", -1, NULL, 0);
-               if (IS_ERR(priv->dmic_codec_dev)) {
-                       dev_err(&pdev->dev, "Can't instantiate dmic-codec\n");
-                       return PTR_ERR(priv->dmic_codec_dev);
-               }
        } else {
                num_links = 1;
        }
@@ -299,16 +290,14 @@ static int omap_abe_probe(struct platform_device *pdev)
        of_property_read_u32(node, "ti,mclk-freq", &priv->mclk_freq);
        if (!priv->mclk_freq) {
                dev_err(&pdev->dev, "MCLK frequency not provided\n");
-               ret = -EINVAL;
-               goto err_unregister;
+               return -EINVAL;
        }
 
        card->fully_routed = 1;
 
        if (!priv->mclk_freq) {
                dev_err(&pdev->dev, "MCLK frequency missing\n");
-               ret = -ENODEV;
-               goto err_unregister;
+               return -ENODEV;
        }
 
        card->dai_link = abe_twl6040_dai_links;
@@ -317,17 +306,9 @@ static int omap_abe_probe(struct platform_device *pdev)
        snd_soc_card_set_drvdata(card, priv);
 
        ret = snd_soc_register_card(card);
-       if (ret) {
+       if (ret)
                dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
                        ret);
-               goto err_unregister;
-       }
-
-       return 0;
-
-err_unregister:
-       if (!IS_ERR(priv->dmic_codec_dev))
-               platform_device_unregister(priv->dmic_codec_dev);
 
        return ret;
 }
@@ -335,13 +316,9 @@ err_unregister:
 static int omap_abe_remove(struct platform_device *pdev)
 {
        struct snd_soc_card *card = platform_get_drvdata(pdev);
-       struct abe_twl6040 *priv = snd_soc_card_get_drvdata(card);
 
        snd_soc_unregister_card(card);
 
-       if (!IS_ERR(priv->dmic_codec_dev))
-               platform_device_unregister(priv->dmic_codec_dev);
-
        return 0;
 }
 
@@ -361,7 +338,33 @@ static struct platform_driver omap_abe_driver = {
        .remove = omap_abe_remove,
 };
 
-module_platform_driver(omap_abe_driver);
+static int __init omap_abe_init(void)
+{
+       int ret;
+
+       dmic_codec_dev = platform_device_register_simple("dmic-codec", -1, NULL,
+                                                        0);
+       if (IS_ERR(dmic_codec_dev)) {
+               pr_err("%s: dmic-codec device registration failed\n", __func__);
+               return PTR_ERR(dmic_codec_dev);
+       }
+
+       ret = platform_driver_register(&omap_abe_driver);
+       if (ret) {
+               pr_err("%s: platform driver registration failed\n", __func__);
+               platform_device_unregister(dmic_codec_dev);
+       }
+
+       return ret;
+}
+module_init(omap_abe_init);
+
+static void __exit omap_abe_exit(void)
+{
+       platform_driver_unregister(&omap_abe_driver);
+       platform_device_unregister(dmic_codec_dev);
+}
+module_exit(omap_abe_exit);
 
 MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>");
 MODULE_DESCRIPTION("ALSA SoC for OMAP boards with ABE and twl6040 codec");
index e7cdc51..64609c7 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_device.h>
@@ -55,7 +54,6 @@ struct omap_mcpdm {
        unsigned long phys_base;
        void __iomem *io_base;
        int irq;
-       struct clk *pdmclk;
 
        struct mutex mutex;
 
@@ -390,15 +388,14 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai)
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
        int ret;
 
-       clk_prepare_enable(mcpdm->pdmclk);
        pm_runtime_enable(mcpdm->dev);
 
        /* Disable lines while request is ongoing */
        pm_runtime_get_sync(mcpdm->dev);
        omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00);
 
-       ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler,
-                               0, "McPDM", (void *)mcpdm);
+       ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM",
+                         (void *)mcpdm);
 
        pm_runtime_put_sync(mcpdm->dev);
 
@@ -423,9 +420,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai)
 {
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
+       free_irq(mcpdm->irq, (void *)mcpdm);
        pm_runtime_disable(mcpdm->dev);
 
-       clk_disable_unprepare(mcpdm->pdmclk);
        return 0;
 }
 
@@ -445,8 +442,6 @@ static int omap_mcpdm_suspend(struct snd_soc_dai *dai)
                mcpdm->pm_active_count++;
        }
 
-       clk_disable_unprepare(mcpdm->pdmclk);
-
        return 0;
 }
 
@@ -454,8 +449,6 @@ static int omap_mcpdm_resume(struct snd_soc_dai *dai)
 {
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
-       clk_prepare_enable(mcpdm->pdmclk);
-
        if (mcpdm->pm_active_count) {
                while (mcpdm->pm_active_count--)
                        pm_runtime_get_sync(mcpdm->dev);
@@ -549,15 +542,6 @@ static int asoc_mcpdm_probe(struct platform_device *pdev)
 
        mcpdm->dev = &pdev->dev;
 
-       mcpdm->pdmclk = devm_clk_get(&pdev->dev, "pdmclk");
-       if (IS_ERR(mcpdm->pdmclk)) {
-               if (PTR_ERR(mcpdm->pdmclk) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-               dev_warn(&pdev->dev, "Error getting pdmclk (%ld)!\n",
-                        PTR_ERR(mcpdm->pdmclk));
-               mcpdm->pdmclk = NULL;
-       }
-
        ret =  devm_snd_soc_register_component(&pdev->dev,
                                               &omap_mcpdm_component,
                                               &omap_mcpdm_dai, 1);
index 50849e1..92e88bc 100644 (file)
@@ -58,10 +58,12 @@ static struct platform_device *s3c24xx_uda134x_snd_device;
 
 static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
-       int ret = 0;
+       struct snd_soc_pcm_runtime *rtd = substream->private_data;
+       struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 #ifdef ENFORCE_RATES
        struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
+       int ret = 0;
 
        mutex_lock(&clk_lock);
        pr_debug("%s %d\n", __func__, clk_users);
@@ -71,8 +73,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
                        printk(KERN_ERR "%s cannot get xtal\n", __func__);
                        ret = PTR_ERR(xtal);
                } else {
-                       pclk = clk_get(&s3c24xx_uda134x_snd_device->dev,
-                                      "pclk");
+                       pclk = clk_get(cpu_dai->dev, "iis");
                        if (IS_ERR(pclk)) {
                                printk(KERN_ERR "%s cannot get pclk\n",
                                       __func__);
index e39f916..969a516 100644 (file)
@@ -226,8 +226,12 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
        ifscr = 0;
        fsrate = 0;
        if (fin != fout) {
+               u64 n;
+
                ifscr = 1;
-               fsrate = 0x0400000 / fout * fin;
+               n = (u64)0x0400000 * fin;
+               do_div(n, fout);
+               fsrate = n;
        }
 
        /*
index d2df46c..bf7b52f 100644 (file)
@@ -121,7 +121,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
 
                dpcm_be_disconnect(fe, stream);
                fe->dpcm[stream].runtime = NULL;
-               goto fe_err;
+               goto path_err;
        }
 
        dpcm_clear_pending_state(fe, stream);
@@ -136,6 +136,8 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
 
        return 0;
 
+path_err:
+       dpcm_path_put(&list);
 fe_err:
        if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
                fe->dai_link->compr_ops->shutdown(cstream);
index 16369ca..4afa8db 100644 (file)
@@ -1056,7 +1056,7 @@ static int soc_bind_dai_link(struct snd_soc_card *card,
        if (!rtd->platform) {
                dev_err(card->dev, "ASoC: platform %s not registered\n",
                        dai_link->platform_name);
-               return -EPROBE_DEFER;
+               goto _err_defer;
        }
 
        soc_add_pcm_runtime(card, rtd);
@@ -2083,14 +2083,13 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
        /* remove auxiliary devices */
        soc_remove_aux_devices(card);
 
+       snd_soc_dapm_free(&card->dapm);
        soc_cleanup_card_debugfs(card);
 
        /* remove the card */
        if (card->remove)
                card->remove(card);
 
-       snd_soc_dapm_free(&card->dapm);
-
        snd_card_free(card->snd_card);
        return 0;
 
index 8698c26..d908ff8 100644 (file)
@@ -3493,6 +3493,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
        const struct snd_soc_pcm_stream *config = w->params + w->params_select;
        struct snd_pcm_substream substream;
        struct snd_pcm_hw_params *params = NULL;
+       struct snd_pcm_runtime *runtime = NULL;
        u64 fmt;
        int ret;
 
@@ -3541,6 +3542,14 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 
        memset(&substream, 0, sizeof(substream));
 
+       /* Allocate a dummy snd_pcm_runtime for startup() and other ops() */
+       runtime = kzalloc(sizeof(*runtime), GFP_KERNEL);
+       if (!runtime) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       substream.runtime = runtime;
+
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                substream.stream = SNDRV_PCM_STREAM_CAPTURE;
@@ -3606,6 +3615,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
        }
 
 out:
+       kfree(runtime);
        kfree(params);
        return ret;
 }
index 204cc07..41aa335 100644 (file)
@@ -55,7 +55,6 @@ static int snd_line6_impulse_volume_put(struct snd_kcontrol *kcontrol,
                err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE);
                if (err < 0) {
                        line6pcm->impulse_volume = 0;
-                       line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE);
                        return err;
                }
        } else {
@@ -211,7 +210,9 @@ static void line6_stream_stop(struct snd_line6_pcm *line6pcm, int direction,
        spin_lock_irqsave(&pstr->lock, flags);
        clear_bit(type, &pstr->running);
        if (!pstr->running) {
+               spin_unlock_irqrestore(&pstr->lock, flags);
                line6_unlink_audio_urbs(line6pcm, pstr);
+               spin_lock_irqsave(&pstr->lock, flags);
                if (direction == SNDRV_PCM_STREAM_CAPTURE) {
                        line6pcm->prev_fbuf = NULL;
                        line6pcm->prev_fsize = 0;
index daf81d1..45dd348 100644 (file)
@@ -244,8 +244,8 @@ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value,
 static ssize_t serial_number_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%u\n", pod->serial_number);
 }
@@ -256,8 +256,8 @@ static ssize_t serial_number_show(struct device *dev,
 static ssize_t firmware_version_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100,
                       pod->firmware_version % 100);
@@ -269,8 +269,8 @@ static ssize_t firmware_version_show(struct device *dev,
 static ssize_t device_id_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%d\n", pod->device_id);
 }
index 6adde45..6cf1f35 100644 (file)
@@ -1128,6 +1128,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 {
        /* devices which do not support reading the sample rate. */
        switch (chip->usb_id) {
+       case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
        case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
@@ -1138,6 +1139,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
        case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+       case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
        case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
        case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
        case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
index f209ea1..3051f86 100644 (file)
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST     2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST   3
+#define KVM_VGIC_ITS_ADDR_TYPE         4
 
 #define KVM_VGIC_V3_DIST_SIZE          SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE                (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE           (2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF         0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT         1 /* CPU running a 32bit VM */
index 3b8e99e..a2ffec4 100644 (file)
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
        __u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT   3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS   1024
+#define KVM_S390_VM_CPU_FEAT_ESOP      0
+#define KVM_S390_VM_CPU_FEAT_SIEF2     1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO   2
+#define KVM_S390_VM_CPU_FEAT_SIIF      3
+#define KVM_S390_VM_CPU_FEAT_GPERE     4
+#define KVM_S390_VM_CPU_FEAT_GSLS      5
+#define KVM_S390_VM_CPU_FEAT_IB                6
+#define KVM_S390_VM_CPU_FEAT_CEI       7
+#define KVM_S390_VM_CPU_FEAT_IBS       8
+#define KVM_S390_VM_CPU_FEAT_SKEY      9
+#define KVM_S390_VM_CPU_FEAT_CMMA      10
+#define KVM_S390_VM_CPU_FEAT_PFMFI     11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF    12
+struct kvm_s390_vm_cpu_feat {
+       __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC      4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC                5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+       __u8 plo[32];           /* always */
+       __u8 ptff[16];          /* with TOD-clock steering */
+       __u8 kmac[16];          /* with MSA */
+       __u8 kmc[16];           /* with MSA */
+       __u8 km[16];            /* with MSA */
+       __u8 kimd[16];          /* with MSA */
+       __u8 klmd[16];          /* with MSA */
+       __u8 pckmo[16];         /* with MSA3 */
+       __u8 kmctr[16];         /* with MSA4 */
+       __u8 kmf[16];           /* with MSA4 */
+       __u8 kmo[16];           /* with MSA4 */
+       __u8 pcc[16];           /* with MSA4 */
+       __u8 ppno[16];          /* with MSA5 */
+       __u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 8fb5d4a..3ac6343 100644 (file)
        exit_code_ipa0(0xB2, 0x4c, "TAR"),      \
        exit_code_ipa0(0xB2, 0x50, "CSP"),      \
        exit_code_ipa0(0xB2, 0x54, "MVPG"),     \
+       exit_code_ipa0(0xB2, 0x56, "STHYI"),    \
        exit_code_ipa0(0xB2, 0x58, "BSG"),      \
        exit_code_ipa0(0xB2, 0x5a, "BSA"),      \
        exit_code_ipa0(0xB2, 0x5f, "CHSC"),     \
index 4a41348..92a8308 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG       X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE   X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
  */
 #define X86_BUG_ESPFIX         X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-
+#define X86_BUG_NULL_SEG       X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE   X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR                X86_BUG(12) /* IPI required to wake up remote CPU */
 #endif /* _ASM_X86_CPUFEATURES_H */
index 911e935..85599ad 100644 (file)
@@ -56,5 +56,7 @@
 #define DISABLED_MASK14        0
 #define DISABLED_MASK15        0
 #define DISABLED_MASK16        (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17        0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
index 4916144..fac9a5c 100644 (file)
@@ -99,5 +99,7 @@
 #define REQUIRED_MASK14        0
 #define REQUIRED_MASK15        0
 #define REQUIRED_MASK16        0
+#define REQUIRED_MASK17        0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
index 5b15d94..37fee27 100644 (file)
@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_INVVPID,               "INVVPID" }, \
        { EXIT_REASON_INVPCID,               "INVPCID" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-       { EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
index 448ed96..1c14c25 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ * gpio-event-mon - monitor GPIO line events from userspace
  *
  * Copyright (C) 2016 Linus Walleij
  *
index b968794..f436d24 100644 (file)
@@ -8,7 +8,11 @@ void *memdup(const void *src, size_t len);
 
 int strtobool(const char *s, bool *res);
 
-#ifdef __GLIBC__
+/*
+ * glibc based builds needs the extern while uClibc doesn't.
+ * However uClibc headers also define __GLIBC__ hence the hack below
+ */
+#if defined(__GLIBC__) && !defined(__UCLIBC__)
 extern size_t strlcpy(char *dest, const char *src, size_t size);
 #endif
 
index 406459b..da218fe 100644 (file)
@@ -84,6 +84,7 @@ enum bpf_map_type {
        BPF_MAP_TYPE_PERCPU_HASH,
        BPF_MAP_TYPE_PERCPU_ARRAY,
        BPF_MAP_TYPE_STACK_TRACE,
+       BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SCHED_CLS,
        BPF_PROG_TYPE_SCHED_ACT,
        BPF_PROG_TYPE_TRACEPOINT,
+       BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD      1
@@ -313,6 +315,66 @@ enum bpf_func_id {
         */
        BPF_FUNC_skb_get_tunnel_opt,
        BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_change_proto(skb, proto, flags)
+        * Change protocol of the skb. Currently supported is
+        * v4 -> v6, v6 -> v4 transitions. The helper will also
+        * resize the skb. eBPF program is expected to fill the
+        * new headers via skb_store_bytes and lX_csum_replace.
+        * @skb: pointer to skb
+        * @proto: new skb->protocol type
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_proto,
+
+       /**
+        * bpf_skb_change_type(skb, type)
+        * Change packet type of skb.
+        * @skb: pointer to skb
+        * @type: new skb->pkt_type type
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_type,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
+
+       /**
+        * bpf_get_hash_recalc(skb)
+        * Retrieve and possibly recalculate skb->hash.
+        * @skb: pointer to skb
+        * Return: hash
+        */
+       BPF_FUNC_get_hash_recalc,
+
+       /**
+        * u64 bpf_get_current_task(void)
+        * Returns current task_struct
+        * Return: current
+        */
+       BPF_FUNC_get_current_task,
+
+       /**
+        * bpf_probe_write_user(void *dst, void *src, int len)
+        * safely attempt to write to a location
+        * @dst: destination address in userspace
+        * @src: source address on stack
+        * @len: number of bytes to copy
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_probe_write_user,
+
        __BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK               0xffffffffULL
 #define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@ struct bpf_tunnel_key {
        __u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+       XDP_ABORTED = 0,
+       XDP_DROP,
+       XDP_PASS,
+       XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+       __u32 data;
+       __u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 736da44..b303bcd 100644 (file)
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
 LINE SYNTAX
 -----------
 Line range is described by following syntax.
index 1f6c705..053bbbd 100644 (file)
@@ -116,8 +116,8 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-       srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
index c6d0f91..35745a7 100644 (file)
@@ -54,10 +54,6 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-       return true;
-}
 
 #ifdef HAVE_LIBELF_SUPPORT
 void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,29 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                        tev->point.offset += lep_offset;
        }
 }
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs)
+{
+       struct probe_trace_event *tev;
+       struct map *map;
+       struct symbol *sym = NULL;
+       struct rb_node *tmp;
+       int i = 0;
+
+       map = get_target_map(pev->target, pev->uprobes);
+       if (!map || map__load(map, NULL) < 0)
+               return;
+
+       for (i = 0; i < ntevs; i++) {
+               tev = &pev->tevs[i];
+               map__for_each_symbol(map, sym, tmp) {
+                       if (map->unmap_ip(map, sym->start) == tev->point.address)
+                               arch__fix_tev_from_maps(pev, tev, map, sym);
+               }
+       }
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
 #endif
index fb51457..a2412e9 100644 (file)
@@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
        struct intel_pt_recording *ptr =
                        container_of(itr, struct intel_pt_recording, itr);
        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
-       bool have_timing_info;
+       bool have_timing_info, need_immediate = false;
        struct perf_evsel *evsel, *intel_pt_evsel = NULL;
        const struct cpu_map *cpus = evlist->cpus;
        bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
@@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                                ptr->have_sched_switch = 3;
                        } else {
                                opts->record_switch_events = true;
+                               need_immediate = true;
                                if (cpu_wide)
                                        ptr->have_sched_switch = 3;
                                else
@@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                tracking_evsel->attr.freq = 0;
                tracking_evsel->attr.sample_period = 1;
 
+               if (need_immediate)
+                       tracking_evsel->immediate = true;
+
                /* In per-cpu case, always need the time of mmap events etc */
                if (!cpu_map__empty(cpus)) {
                        perf_evsel__set_sample_bit(tracking_evsel, TIME);
index d608a2c..d1ce29b 100644 (file)
@@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        if (mem->operation & MEM_OPERATION_LOAD)
                perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
 
+       if (mem->operation & MEM_OPERATION_STORE)
+               perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
        if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
                rec_argv[i++] = "-W";
 
index 971ff91..c859e59 100644 (file)
@@ -371,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session)
 
        if (!no_callchain) {
                bool use_callchain = false;
+               bool not_pipe = false;
 
                evlist__for_each_entry(session->evlist, evsel) {
+                       not_pipe = true;
                        if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                                use_callchain = true;
                                break;
                        }
                }
-               if (!use_callchain)
+               if (not_pipe && !use_callchain)
                        symbol_conf.use_callchain = false;
        }
 
@@ -1690,8 +1692,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
        snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
        scripts_dir = opendir(scripts_path);
-       if (!scripts_dir)
-               return -1;
+       if (!scripts_dir) {
+               fprintf(stdout,
+                       "open(%s) failed.\n"
+                       "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+                       scripts_path);
+               exit(-1);
+       }
 
        for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
@@ -2116,7 +2123,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Valid types: hw,sw,trace,raw. "
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
                     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-                    "callindent", parse_output_fields),
+                    "bpf-output,callindent", parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
        OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
index 0c16d20..3c7452b 100644 (file)
@@ -331,7 +331,7 @@ static int read_counter(struct perf_evsel *counter)
        return 0;
 }
 
-static void read_counters(bool close_counters)
+static void read_counters(void)
 {
        struct perf_evsel *counter;
 
@@ -341,11 +341,6 @@ static void read_counters(bool close_counters)
 
                if (perf_stat_process_counter(&stat_config, counter))
                        pr_warning("failed to process counter %s\n", counter->name);
-
-               if (close_counters) {
-                       perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-                                            thread_map__nr(evsel_list->threads));
-               }
        }
 }
 
@@ -353,7 +348,7 @@ static void process_interval(void)
 {
        struct timespec ts, rs;
 
-       read_counters(false);
+       read_counters();
 
        clock_gettime(CLOCK_MONOTONIC, &ts);
        diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@ static void enable_counters(void)
                perf_evlist__enable(evsel_list);
 }
 
+static void disable_counters(void)
+{
+       /*
+        * If we don't have tracee (attaching to task or cpu), counters may
+        * still be running. To get accurate group ratios, we must stop groups
+        * from counting before reading their constituent counters.
+        */
+       if (!target__none(&target))
+               perf_evlist__disable(evsel_list);
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -657,11 +663,20 @@ try_again:
                }
        }
 
+       disable_counters();
+
        t1 = rdclock();
 
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
-       read_counters(true);
+       /*
+        * Closing a group leader splits the group, and as we only disable
+        * group leaders, results in remaining events becoming enabled. To
+        * avoid arbitrary skew, we must read all counters before closing any
+        * group leaders.
+        */
+       read_counters();
+       perf_evlist__close(evsel_list);
 
        return WEXITSTATUS(status);
 }
index d9b80ef..21fd573 100644 (file)
@@ -507,17 +507,17 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
        u8 op, result, type = (config >>  0) & 0xff;
        const char *err = "unknown-ext-hardware-cache-type";
 
-       if (type > PERF_COUNT_HW_CACHE_MAX)
+       if (type >= PERF_COUNT_HW_CACHE_MAX)
                goto out_err;
 
        op = (config >>  8) & 0xff;
        err = "unknown-ext-hardware-cache-op";
-       if (op > PERF_COUNT_HW_CACHE_OP_MAX)
+       if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
                goto out_err;
 
        result = (config >> 16) & 0xff;
        err = "unknown-ext-hardware-cache-result";
-       if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+       if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
                goto out_err;
 
        err = "invalid-cache";
index 9c8f15d..8ff6c6a 100644 (file)
@@ -123,8 +123,6 @@ struct intel_pt_decoder {
        bool have_calc_cyc_to_tsc;
        int exec_mode;
        unsigned int insn_bytes;
-       uint64_t sign_bit;
-       uint64_t sign_bits;
        uint64_t period;
        enum intel_pt_period_type period_type;
        uint64_t tot_insn_cnt;
@@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
        decoder->data               = params->data;
        decoder->return_compression = params->return_compression;
 
-       decoder->sign_bit           = (uint64_t)1 << 47;
-       decoder->sign_bits          = ~(((uint64_t)1 << 48) - 1);
-
        decoder->period             = params->period;
        decoder->period_type        = params->period_type;
 
@@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen)
        return 0;
 }
 
-static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
-                                const struct intel_pt_pkt *packet,
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
                                 uint64_t last_ip)
 {
        uint64_t ip;
 
        switch (packet->count) {
-       case 2:
+       case 1:
                ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
                     packet->payload;
                break;
-       case 4:
+       case 2:
                ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
                     packet->payload;
                break;
+       case 3:
+               ip = packet->payload;
+               /* Sign-extend 6-byte ip */
+               if (ip & (uint64_t)0x800000000000ULL)
+                       ip |= (uint64_t)0xffff000000000000ULL;
+               break;
+       case 4:
+               ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+                    packet->payload;
+               break;
        case 6:
                ip = packet->payload;
                break;
@@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
                return 0;
        }
 
-       if (ip & decoder->sign_bit)
-               return ip | decoder->sign_bits;
-
        return ip;
 }
 
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
-       decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
-                                           decoder->last_ip);
+       decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -1657,6 +1657,12 @@ next:
        }
 }
 
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+       return decoder->last_ip || decoder->packet.count == 0 ||
+              decoder->packet.count == 3 || decoder->packet.count == 6;
+}
+
 /* Walk PSB+ packets to get in sync. */
 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 {
@@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 
                case INTEL_PT_FUP:
                        decoder->pge = true;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0) {
+                       if (intel_pt_have_ip(decoder)) {
                                uint64_t current_ip = decoder->ip;
 
                                intel_pt_set_ip(decoder);
@@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
                case INTEL_PT_TIP_PGE:
                case INTEL_PT_TIP:
                        decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0)
+                       if (intel_pt_have_ip(decoder))
                                intel_pt_set_ip(decoder);
                        if (decoder->ip)
                                return 0;
@@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 
                case INTEL_PT_FUP:
                        if (decoder->overflow) {
-                               if (decoder->last_ip ||
-                                   decoder->packet.count == 6 ||
-                                   decoder->packet.count == 0)
+                               if (intel_pt_have_ip(decoder))
                                        intel_pt_set_ip(decoder);
                                if (decoder->ip)
                                        return 0;
index b1257c8..4f7b320 100644 (file)
@@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
                           const unsigned char *buf, size_t len,
                           struct intel_pt_pkt *packet)
 {
-       switch (byte >> 5) {
+       int ip_len;
+
+       packet->count = byte >> 5;
+
+       switch (packet->count) {
        case 0:
-               packet->count = 0;
+               ip_len = 0;
                break;
        case 1:
                if (len < 3)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 2;
+               ip_len = 2;
                packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
                break;
        case 2:
                if (len < 5)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 4;
+               ip_len = 4;
                packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
                break;
        case 3:
-       case 6:
+       case 4:
                if (len < 7)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 6;
+               ip_len = 6;
                memcpy_le64(&packet->payload, buf + 1, 6);
                break;
+       case 6:
+               if (len < 9)
+                       return INTEL_PT_NEED_MORE_BYTES;
+               ip_len = 8;
+               packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+               break;
        default:
                return INTEL_PT_BAD_PACKET;
        }
 
        packet->type = type;
 
-       return packet->count + 1;
+       return ip_len + 1;
 }
 
 static int intel_pt_get_mode(const unsigned char *buf, size_t len,
index 9f3305f..95f0884 100644 (file)
@@ -1,3 +1,4 @@
+#include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
index 953dc1a..2873396 100644 (file)
@@ -170,15 +170,17 @@ static struct map *kernel_get_module_map(const char *module)
                module = "kernel";
 
        for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+               /* short_name is "[module]" */
                if (strncmp(pos->dso->short_name + 1, module,
-                           pos->dso->short_name_len - 2) == 0) {
+                           pos->dso->short_name_len - 2) == 0 &&
+                   module[pos->dso->short_name_len - 2] == '\0') {
                        return pos;
                }
        }
        return NULL;
 }
 
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
 {
        /* Init maps of given executable or kernel */
        if (user)
@@ -385,7 +387,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
                if (uprobes)
                        address = sym->start;
                else
-                       address = map->unmap_ip(map, sym->start);
+                       address = map->unmap_ip(map, sym->start) - map->reloc;
                break;
        }
        if (!address) {
@@ -664,22 +666,14 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
        return ret;
 }
 
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-                                          int ntevs, const char *module,
-                                          bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+                                      int ntevs)
 {
        struct ref_reloc_sym *reloc_sym;
        char *tmp;
        int i, skipped = 0;
 
-       if (uprobe)
-               return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-       /* Note that currently ref_reloc_sym based probe is not for drivers */
-       if (module)
-               return add_module_to_probe_trace_events(tevs, ntevs, module);
-
        reloc_sym = kernel_get_ref_reloc_sym();
        if (!reloc_sym) {
                pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
        return skipped;
 }
 
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+                                     int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          struct probe_trace_event *tevs,
+                                          int ntevs, const char *module,
+                                          bool uprobe)
+{
+       int ret;
+
+       if (uprobe)
+               ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+       else if (module)
+               /* Currently ref_reloc_sym based probe is not for drivers */
+               ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+       else
+               ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+       if (ret >= 0)
+               arch__post_process_probe_trace_events(pev, ntevs);
+
+       return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                          struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
        if (ntevs > 0) {        /* Succeeded to find trace events */
                pr_debug("Found %d probe_trace_events.\n", ntevs);
-               ret = post_process_probe_trace_events(*tevs, ntevs,
+               ret = post_process_probe_trace_events(pev, *tevs, ntevs,
                                                pev->target, pev->uprobes);
                if (ret < 0 || ret == ntevs) {
                        clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@ errout:
        return err;
 }
 
-bool __weak arch__prefers_symtab(void) { return false; }
-
 /* Concatinate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
@@ -3158,12 +3178,6 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
        if (ret > 0 || pev->sdt)        /* SDT can be found only in the cache */
                return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
 
-       if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-               ret = find_probe_trace_events_from_map(pev, tevs);
-               if (ret > 0)
-                       return ret; /* Found in symbol table */
-       }
-
        /* Convert perf_probe_event with debuginfo */
        ret = try_to_find_probe_trace_events(pev, tevs);
        if (ret != 0)
index e18ea9f..f4f45db 100644 (file)
@@ -158,7 +158,6 @@ int show_line_range(struct line_range *lr, const char *module, bool user);
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
                        struct strfilter *filter);
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                             struct probe_trace_event *tev, struct map *map,
                             struct symbol *sym);
@@ -173,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
 int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
                            struct perf_probe_arg *pvar);
 
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs);
+
 #endif /*_PROBE_EVENT_H */
index 9aed9c3..9c3b9ed 100644 (file)
@@ -133,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag)
 /* Get raw string list of current kprobe_events  or uprobe_events */
 struct strlist *probe_file__get_rawlist(int fd)
 {
-       int ret, idx;
+       int ret, idx, fddup;
        FILE *fp;
        char buf[MAX_CMDLEN];
        char *p;
@@ -143,8 +143,17 @@ struct strlist *probe_file__get_rawlist(int fd)
                return NULL;
 
        sl = strlist__new(NULL, NULL);
+       if (sl == NULL)
+               return NULL;
+
+       fddup = dup(fd);
+       if (fddup < 0)
+               goto out_free_sl;
+
+       fp = fdopen(fddup, "r");
+       if (!fp)
+               goto out_close_fddup;
 
-       fp = fdopen(dup(fd), "r");
        while (!feof(fp)) {
                p = fgets(buf, MAX_CMDLEN, fp);
                if (!p)
@@ -156,13 +165,21 @@ struct strlist *probe_file__get_rawlist(int fd)
                ret = strlist__add(sl, buf);
                if (ret < 0) {
                        pr_debug("strlist__add failed (%d)\n", ret);
-                       strlist__delete(sl);
-                       return NULL;
+                       goto out_close_fp;
                }
        }
        fclose(fp);
 
        return sl;
+
+out_close_fp:
+       fclose(fp);
+       goto out_free_sl;
+out_close_fddup:
+       close(fddup);
+out_free_sl:
+       strlist__delete(sl);
+       return NULL;
 }
 
 static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
@@ -447,12 +464,17 @@ static int probe_cache__load(struct probe_cache *pcache)
 {
        struct probe_cache_entry *entry = NULL;
        char buf[MAX_CMDLEN], *p;
-       int ret = 0;
+       int ret = 0, fddup;
        FILE *fp;
 
-       fp = fdopen(dup(pcache->fd), "r");
-       if (!fp)
+       fddup = dup(pcache->fd);
+       if (fddup < 0)
+               return -errno;
+       fp = fdopen(fddup, "r");
+       if (!fp) {
+               close(fddup);
                return -EINVAL;
+       }
 
        while (!feof(fp)) {
                if (!fgets(buf, MAX_CMDLEN, fp))
index f2d9ff0..5c290c6 100644 (file)
@@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
        char sbuf[STRERR_BUFSIZE];
        int bsize, boffs, total;
        int ret;
+       char sign;
 
        /* TODO: check all types */
-       if (cast && strcmp(cast, "string") != 0) {
+       if (cast && strcmp(cast, "string") != 0 &&
+           strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
                /* Non string type is OK */
+               /* and respect signedness cast */
                tvar->type = strdup(cast);
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
@@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
 
+       if (cast && (strcmp(cast, "u") == 0))
+               sign = 'u';
+       else if (cast && (strcmp(cast, "s") == 0))
+               sign = 's';
+       else
+               sign = die_is_signed_type(&type) ? 's' : 'u';
+
        ret = dwarf_bytesize(&type);
        if (ret <= 0)
                /* No size ... try to use default type */
@@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                        dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
                ret = MAX_BASIC_TYPE_BITS;
        }
-       ret = snprintf(buf, 16, "%c%d",
-                      die_is_signed_type(&type) ? 's' : 'u', ret);
+       ret = snprintf(buf, 16, "%c%d", sign, ret);
 
 formatted:
        if (ret < 0 || ret >= 16) {
index 947d21f..3d3cb83 100644 (file)
@@ -588,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he)
        } else {
                pevent_event_info(&seq, evsel->tp_format, &rec);
        }
-       return seq.buffer;
+       /*
+        * Trim the buffer, it starts at 4KB and we're not going to
+        * add anything more to this buffer.
+        */
+       return realloc(seq.buffer, seq.len + 1);
 }
 
 static int64_t
index a34321e..a811c13 100644 (file)
@@ -837,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
        sec = syms_ss->symtab;
        shdr = syms_ss->symshdr;
 
-       if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+       if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+                               ".text", NULL))
                dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
 
        if (runtime_ss->opdsec)
index cf5e250..783a53f 100644 (file)
@@ -66,7 +66,7 @@ static int entry(u64 ip, struct unwind_info *ui)
        if (__report_module(&al, ip, ui))
                return -1;
 
-       e->ip  = ip;
+       e->ip  = al.addr;
        e->map = al.map;
        e->sym = al.sym;
 
index 97c0f8f..20c2e57 100644 (file)
@@ -542,7 +542,7 @@ static int entry(u64 ip, struct thread *thread,
        thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
                                   MAP__FUNCTION, ip, &al);
 
-       e.ip = ip;
+       e.ip = al.addr;
        e.map = al.map;
        e.sym = al.sym;
 
index 5404efa..dd48f42 100644 (file)
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
@@ -1474,6 +1475,7 @@ static int nfit_test_probe(struct platform_device *pdev)
        if (nfit_test->setup != nfit_test0_setup)
                return 0;
 
+       flush_work(&acpi_desc->work);
        nfit_test->setup_hotplug = 1;
        nfit_test->setup(nfit_test);
 
index 3c40c9d..1cc6d64 100644 (file)
@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
 export CFLAGS
 
index 4f93af8..18601f6 100644 (file)
@@ -14,4 +14,20 @@ enum dma_data_direction {
        DMA_NONE = 3,
 };
 
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+       void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+       *(hp) = (unsigned long)__dma_alloc_coherent_p; \
+       __dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
 #endif
index 0338499..d9554fc 100644 (file)
@@ -20,7 +20,9 @@
 
 #define PAGE_SIZE getpagesize()
 #define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
 
+typedef unsigned long long phys_addr_t;
 typedef unsigned long long dma_addr_t;
 typedef size_t __kernel_size_t;
 typedef unsigned int __wsum;
@@ -57,6 +59,11 @@ static inline void *kzalloc(size_t s, gfp_t gfp)
        return p;
 }
 
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+       return kmalloc(s, gfp);
+}
+
 static inline void kfree(void *p)
 {
        if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
@@ -64,6 +71,11 @@ static inline void kfree(void *p)
        free(p);
 }
 
+static inline void free_pages_exact(void *p, size_t s)
+{
+       kfree(p);
+}
+
 static inline void *krealloc(void *p, size_t s, gfp_t gfp)
 {
        return realloc(p, s);
@@ -105,6 +117,8 @@ static inline void free_page(unsigned long addr)
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
+#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+
 #define min(x, y) ({                           \
        typeof(x) _min1 = (x);                  \
        typeof(y) _min2 = (y);                  \
index 81baeac..7e1c119 100644 (file)
@@ -1,2 +1,6 @@
 #ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
 #endif
index ee125e7..9377c8b 100644 (file)
@@ -3,8 +3,12 @@
 #include <linux/scatterlist.h>
 #include <linux/kernel.h>
 
+struct device {
+       void *parent;
+};
+
 struct virtio_device {
-       void *dev;
+       struct device dev;
        u64 features;
 };
 
index 57a6964..9ba1181 100644 (file)
@@ -40,6 +40,19 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
 #define virtio_has_feature(dev, feature) \
        (__virtio_test_bit((dev), feature))
 
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+       /*
+        * Note the reverse polarity of the quirk feature (compared to most
+        * other features), this is for compatibility with legacy systems.
+        */
+       return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
 static inline bool virtio_is_little_endian(struct virtio_device *vdev)
 {
        return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
index 68e4f9f..bd2ad1d 100644 (file)
@@ -13,6 +13,7 @@
 #define cache_line_size() SMP_CACHE_BYTES
 #define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
 #define unlikely(x)    (__builtin_expect(!!(x), 0))
+#define likely(x)    (__builtin_expect(!!(x), 1))
 #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
 typedef pthread_spinlock_t  spinlock_t;
 
index 4fde8c7..77e6ccf 100644 (file)
@@ -33,6 +33,7 @@
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -365,7 +366,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void kvm_timer_init_interrupt(void *info)
 {
-       enable_percpu_irq(host_vtimer_irq, 0);
+       enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +433,14 @@ int kvm_timer_hyp_init(void)
        }
        host_vtimer_irq = info->virtual_irq;
 
+       host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+       if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+           host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+               kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+                       host_vtimer_irq);
+               host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+       }
+
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
                                 "kvm guest timer", kvm_get_running_vcpus());
        if (err) {
index fb4b0a7..83777c1 100644 (file)
@@ -73,12 +73,8 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
        int i, vcpu_lock_idx = -1, ret;
        struct kvm_vcpu *vcpu;
 
-       mutex_lock(&kvm->lock);
-
-       if (irqchip_in_kernel(kvm)) {
-               ret = -EEXIST;
-               goto out;
-       }
+       if (irqchip_in_kernel(kvm))
+               return -EEXIST;
 
        /*
         * This function is also called by the KVM_CREATE_IRQCHIP handler,
@@ -87,10 +83,8 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
         * the proper checks already.
         */
        if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
-               !kvm_vgic_global_state.can_emulate_gicv2) {
-               ret = -ENODEV;
-               goto out;
-       }
+               !kvm_vgic_global_state.can_emulate_gicv2)
+               return -ENODEV;
 
        /*
         * Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -138,9 +132,6 @@ out_unlock:
                vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
                mutex_unlock(&vcpu->mutex);
        }
-
-out:
-       mutex_unlock(&kvm->lock);
        return ret;
 }
 
index 07411cf..4660a7d 100644 (file)
@@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 
        irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
        if (!irq)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&irq->lpi_list);
        INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
  * Find the target VCPU and the LPI number for a given devid/eventid pair
  * and make this IRQ pending, possibly injecting it.
  * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
  */
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
-                                u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+                               u32 devid, u32 eventid)
 {
+       struct kvm_vcpu *vcpu;
        struct its_itte *itte;
 
        if (!its->enabled)
-               return;
+               return -EBUSY;
 
        itte = find_itte(its, devid, eventid);
-       /* Triggering an unmapped IRQ gets silently dropped. */
-       if (itte && its_is_collection_mapped(itte->collection)) {
-               struct kvm_vcpu *vcpu;
-
-               vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
-               if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
-                       spin_lock(&itte->irq->irq_lock);
-                       itte->irq->pending = true;
-                       vgic_queue_irq_unlock(kvm, itte->irq);
-               }
-       }
+       if (!itte || !its_is_collection_mapped(itte->collection))
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+       if (!vcpu)
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       if (!vcpu->arch.vgic_cpu.lpis_enabled)
+               return -EBUSY;
+
+       spin_lock(&itte->irq->irq_lock);
+       itte->irq->pending = true;
+       vgic_queue_irq_unlock(kvm, itte->irq);
+
+       return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+       struct vgic_io_device *iodev;
+
+       if (dev->ops != &kvm_io_gic_ops)
+               return NULL;
+
+       iodev = container_of(dev, struct vgic_io_device, dev);
+
+       if (iodev->iodev_type != IODEV_ITS)
+               return NULL;
+
+       return iodev;
 }
 
 /*
  * Queries the KVM IO bus framework to get the ITS pointer from the given
  * doorbell address.
  * We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
  */
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
        u64 address;
        struct kvm_io_device *kvm_io_dev;
        struct vgic_io_device *iodev;
+       int ret;
 
        if (!vgic_has_its(kvm))
                return -ENODEV;
@@ -485,15 +509,28 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 
        kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
        if (!kvm_io_dev)
-               return -ENODEV;
+               return -EINVAL;
 
-       iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+       iodev = vgic_get_its_iodev(kvm_io_dev);
+       if (!iodev)
+               return -EINVAL;
 
        mutex_lock(&iodev->its->its_lock);
-       vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+       ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
        mutex_unlock(&iodev->its->its_lock);
 
-       return 0;
+       if (ret < 0)
+               return ret;
+
+       /*
+        * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+        * if the guest has blocked the MSI. So we map any LPI mapping
+        * related error to that.
+        */
+       if (ret)
+               return 0;
+       else
+               return 1;
 }
 
 /* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
        list_del(&itte->itte_list);
 
        /* This put matches the get in vgic_add_lpi. */
-       vgic_put_irq(kvm, itte->irq);
+       if (itte->irq)
+               vgic_put_irq(kvm, itte->irq);
 
        kfree(itte);
 }
@@ -697,6 +735,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
        struct its_device *device;
        struct its_collection *collection, *new_coll = NULL;
        int lpi_nr;
+       struct vgic_irq *irq;
 
        device = find_its_device(its, device_id);
        if (!device)
@@ -710,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
            lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
                return E_ITS_MAPTI_PHYSICALID_OOR;
 
+       /* If there is an existing mapping, behavior is UNPREDICTABLE. */
+       if (find_itte(its, device_id, event_id))
+               return 0;
+
        collection = find_collection(its, coll_id);
        if (!collection) {
                int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
                new_coll = collection;
        }
 
-       itte = find_itte(its, device_id, event_id);
+       itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
        if (!itte) {
-               itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-               if (!itte) {
-                       if (new_coll)
-                               vgic_its_free_collection(its, coll_id);
-                       return -ENOMEM;
-               }
-
-               itte->event_id  = event_id;
-               list_add_tail(&itte->itte_list, &device->itt_head);
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               return -ENOMEM;
        }
 
+       itte->event_id  = event_id;
+       list_add_tail(&itte->itte_list, &device->itt_head);
+
        itte->collection = collection;
        itte->lpi = lpi_nr;
-       itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+       irq = vgic_add_lpi(kvm, lpi_nr);
+       if (IS_ERR(irq)) {
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               its_free_itte(kvm, itte);
+               return PTR_ERR(irq);
+       }
+       itte->irq = irq;
+
        update_affinity_itte(kvm, itte);
 
        /*
@@ -981,9 +1030,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
        u32 msi_data = its_cmd_get_id(its_cmd);
        u64 msi_devid = its_cmd_get_deviceid(its_cmd);
 
-       vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
-       return 0;
+       return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
 }
 
 /*
@@ -1288,13 +1335,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
                its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 {
        struct vgic_io_device *iodev = &its->iodev;
        int ret;
 
-       if (its->initialized)
-               return 0;
+       if (!its->initialized)
+               return -EBUSY;
 
        if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
                return -ENXIO;
@@ -1311,9 +1358,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
                                      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
        mutex_unlock(&kvm->slots_lock);
 
-       if (!ret)
-               its->initialized = true;
-
        return ret;
 }
 
@@ -1435,9 +1479,6 @@ static int vgic_its_set_attr(struct kvm_device *dev,
                if (type != KVM_VGIC_ITS_ADDR_TYPE)
                        return -ENODEV;
 
-               if (its->initialized)
-                       return -EBUSY;
-
                if (copy_from_user(&addr, uaddr, sizeof(addr)))
                        return -EFAULT;
 
@@ -1453,7 +1494,9 @@ static int vgic_its_set_attr(struct kvm_device *dev,
        case KVM_DEV_ARM_VGIC_GRP_CTRL:
                switch (attr->attr) {
                case KVM_DEV_ARM_VGIC_CTRL_INIT:
-                       return vgic_its_init_its(dev->kvm, its);
+                       its->initialized = true;
+
+                       return 0;
                }
                break;
        }
@@ -1498,3 +1541,30 @@ int kvm_vgic_register_its_device(void)
        return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
                                       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       struct kvm_device *dev;
+       int ret = 0;
+
+       list_for_each_entry(dev, &kvm->devices, vm_node) {
+               if (dev->ops != &kvm_arm_vgic_its_ops)
+                       continue;
+
+               ret = vgic_register_its_iodev(kvm, dev->private);
+               if (ret)
+                       return ret;
+               /*
+                * We don't need to care about tearing down previously
+                * registered ITSes, as the kvm_io_bus framework removes
+                * them for us if the VM gets destroyed.
+                */
+       }
+
+       return ret;
+}
index ff668e0..90d8181 100644 (file)
@@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 propbaser = dist->propbaser;
+       u64 old_propbaser, propbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
-       propbaser = vgic_sanitise_propbaser(propbaser);
-
-       dist->propbaser = propbaser;
+       do {
+               old_propbaser = dist->propbaser;
+               propbaser = old_propbaser;
+               propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+               propbaser = vgic_sanitise_propbaser(propbaser);
+       } while (cmpxchg64(&dist->propbaser, old_propbaser,
+                          propbaser) != old_propbaser);
 }
 
 static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
                                     unsigned long val)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 pendbaser = vgic_cpu->pendbaser;
+       u64 old_pendbaser, pendbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
-       pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
-       vgic_cpu->pendbaser = pendbaser;
+       do {
+               old_pendbaser = vgic_cpu->pendbaser;
+               pendbaser = old_pendbaser;
+               pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+               pendbaser = vgic_sanitise_pendbaser(pendbaser);
+       } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+                          pendbaser) != old_pendbaser);
 }
 
 /*
index 0506543..9f0dae3 100644 (file)
@@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm)
                goto out;
        }
 
+       if (vgic_has_its(kvm)) {
+               ret = vgic_register_its_iodevs(kvm);
+               if (ret) {
+                       kvm_err("Unable to register VGIC ITS MMIO regions\n");
+                       goto out;
+               }
+       }
+
        dist->ready = true;
 
 out:
index e7aeac7..e83b7fe 100644 (file)
@@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref)
 
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
-       struct vgic_dist *dist;
+       struct vgic_dist *dist = &kvm->arch.vgic;
 
        if (irq->intid < VGIC_MIN_LPI)
                return;
 
-       if (!kref_put(&irq->refcount, vgic_irq_release))
+       spin_lock(&dist->lpi_list_lock);
+       if (!kref_put(&irq->refcount, vgic_irq_release)) {
+               spin_unlock(&dist->lpi_list_lock);
                return;
+       };
 
-       dist = &kvm->arch.vgic;
-
-       spin_lock(&dist->lpi_list_lock);
        list_del(&irq->lpi_list);
        dist->lpi_list_count--;
        spin_unlock(&dist->lpi_list_lock);
index 1d8e21d..6c4625c 100644 (file)
@@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm,
        return -ENODEV;
 }
 
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       return -ENODEV;
+}
+
 static inline bool vgic_has_its(struct kvm *kvm)
 {
        return false;
index cc081cc..1950782 100644 (file)
@@ -696,6 +696,11 @@ static void kvm_destroy_devices(struct kvm *kvm)
 {
        struct kvm_device *dev, *tmp;
 
+       /*
+        * We do not need to take the kvm->lock here, because nobody else
+        * has a reference to the struct kvm at this point and therefore
+        * cannot access the devices list anyhow.
+        */
        list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
                list_del(&dev->vm_node);
                dev->ops->destroy(dev);
@@ -2832,19 +2837,28 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        dev->ops = ops;
        dev->kvm = kvm;
 
+       mutex_lock(&kvm->lock);
        ret = ops->create(dev, cd->type);
        if (ret < 0) {
+               mutex_unlock(&kvm->lock);
                kfree(dev);
                return ret;
        }
+       list_add(&dev->vm_node, &kvm->devices);
+       mutex_unlock(&kvm->lock);
+
+       if (ops->init)
+               ops->init(dev);
 
        ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
        if (ret < 0) {
                ops->destroy(dev);
+               mutex_lock(&kvm->lock);
+               list_del(&dev->vm_node);
+               mutex_unlock(&kvm->lock);
                return ret;
        }
 
-       list_add(&dev->vm_node, &kvm->devices);
        kvm_get_kvm(kvm);
        cd->fd = ret;
        return 0;